assemblyline-v4-service 4.4.0.24__py3-none-any.whl → 4.4.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

Files changed (42) hide show
  1. assemblyline_v4_service/VERSION +1 -1
  2. assemblyline_v4_service/common/api.py +3 -2
  3. assemblyline_v4_service/common/base.py +3 -4
  4. assemblyline_v4_service/common/helper.py +1 -2
  5. assemblyline_v4_service/common/{extractor/ocr.py → ocr.py} +0 -1
  6. assemblyline_v4_service/common/ontology_helper.py +7 -8
  7. assemblyline_v4_service/common/request.py +4 -5
  8. assemblyline_v4_service/common/result.py +3 -3
  9. assemblyline_v4_service/common/task.py +3 -3
  10. assemblyline_v4_service/common/utils.py +2 -2
  11. assemblyline_v4_service/updater/helper.py +4 -0
  12. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/METADATA +1 -1
  13. assemblyline_v4_service-4.4.0.26.dist-info/RECORD +28 -0
  14. assemblyline_v4_service/common/balbuzard/__init__.py +0 -0
  15. assemblyline_v4_service/common/balbuzard/balbuzard.py +0 -656
  16. assemblyline_v4_service/common/balbuzard/bbcrack.py +0 -830
  17. assemblyline_v4_service/common/balbuzard/patterns.py +0 -650
  18. assemblyline_v4_service/common/dynamic_service_helper.py +0 -3631
  19. assemblyline_v4_service/common/extractor/__init__.py +0 -1
  20. assemblyline_v4_service/common/extractor/base64.py +0 -86
  21. assemblyline_v4_service/common/extractor/pe_file.py +0 -51
  22. assemblyline_v4_service/common/icap.py +0 -149
  23. assemblyline_v4_service/common/keytool_parse.py +0 -66
  24. assemblyline_v4_service/common/pestudio/__init__.py +0 -0
  25. assemblyline_v4_service/common/pestudio/xml/__init__.py +0 -0
  26. assemblyline_v4_service/common/pestudio/xml/features.xml +0 -5607
  27. assemblyline_v4_service/common/pestudio/xml/functions.xml +0 -5824
  28. assemblyline_v4_service/common/pestudio/xml/languages.xml +0 -375
  29. assemblyline_v4_service/common/pestudio/xml/resources.xml +0 -511
  30. assemblyline_v4_service/common/pestudio/xml/signatures.xml +0 -29105
  31. assemblyline_v4_service/common/pestudio/xml/strings.xml +0 -2379
  32. assemblyline_v4_service/common/safelist_helper.py +0 -73
  33. assemblyline_v4_service/common/section_reducer.py +0 -43
  34. assemblyline_v4_service/common/tag_helper.py +0 -117
  35. assemblyline_v4_service/common/tag_reducer.py +0 -242
  36. assemblyline_v4_service/testing/__init__.py +0 -0
  37. assemblyline_v4_service/testing/helper.py +0 -463
  38. assemblyline_v4_service/testing/regenerate_results.py +0 -37
  39. assemblyline_v4_service-4.4.0.24.dist-info/RECORD +0 -53
  40. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/LICENCE.md +0 -0
  41. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/WHEEL +0 -0
  42. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/top_level.txt +0 -0
@@ -1,3631 +0,0 @@
1
- from datetime import datetime
2
- from hashlib import sha256
3
- from json import dumps
4
- from logging import getLogger
5
- from re import compile, escape, findall
6
- from re import match as re_match
7
- from re import sub
8
- from typing import Any, Dict, List, Optional, Union
9
- from urllib.parse import urlparse
10
- from uuid import UUID, uuid4
11
-
12
- from assemblyline.common import log as al_log
13
- from assemblyline.common.attack_map import attack_map, group_map, revoke_map, software_map
14
- from assemblyline.common.digests import get_sha256_for_file
15
- from assemblyline.common.isotime import LOCAL_FMT, MAX_TIME, MIN_TIME, epoch_to_local, format_time, local_to_epoch
16
- from assemblyline.common.uid import get_random_id
17
- from assemblyline.odm.base import DOMAIN_REGEX, FULL_URI, IP_REGEX, IPV4_REGEX, URI_PATH
18
- from assemblyline.odm.models.ontology.results import NetworkConnection as NetworkConnectionModel
19
- from assemblyline.odm.models.ontology.results import Process as ProcessModel
20
- from assemblyline.odm.models.ontology.results import Sandbox as SandboxModel
21
- from assemblyline.odm.models.ontology.results import Signature as SignatureModel
22
-
23
- # from assemblyline_v4_service.common.balbuzard.patterns import PatternMatch
24
- from assemblyline_v4_service.common.base import ServiceBase
25
- from assemblyline_v4_service.common.request import ServiceRequest
26
- from assemblyline_v4_service.common.result import (
27
- ProcessItem,
28
- ResultProcessTreeSection,
29
- ResultSection,
30
- ResultTableSection,
31
- TableRow,
32
- )
33
- from assemblyline_v4_service.common.safelist_helper import URL_REGEX, is_tag_safelisted
34
- from assemblyline_v4_service.common.tag_helper import add_tag
35
- from assemblyline_v4_service.common.task import MaxExtractedExceeded
36
-
37
- al_log.init_logging("service.service_base.dynamic_service_helper")
38
- log = getLogger("assemblyline.service.service_base.dynamic_service_helper")
39
-
40
- X86_64 = "x86_64"
41
- X86 = "x86"
42
-
43
- SYSTEM_DRIVE = "c:\\"
44
- SYSTEM_ROOT = "c:\\windows\\"
45
- WINDIR_ENV_VARIABLE = "%windir%"
46
- SAMPLEPATH_ENV_VARIABLE = "%samplepath%"
47
- SZ_USR_TEMP_PATH = "users\\*\\appdata\\local\\temp\\"
48
- SZ_USR_PATH = "users\\*\\"
49
- ARCH_SPECIFIC_DEFAULTS = {
50
- X86_64: {
51
- "szProgFiles86": "program files (x86)",
52
- "szProgFiles64": "program files",
53
- "szSys86": "syswow64",
54
- "szSys64": "system32",
55
- },
56
- X86: {"szProgFiles86": "program files", "szSys86": "system32"},
57
- }
58
-
59
- HOLLOWSHUNTER_EXE_REGEX = r"[0-9]{1,}_hollowshunter\/hh_process_[0-9]{3,}_[0-9a-z]{3,}(\.[a-zA-Z0-9]{2,})*\.exe$"
60
- HOLLOWSHUNTER_DLL_REGEX = r"[0-9]{1,}_hollowshunter\/hh_process_[0-9]{3,}_[0-9a-z]{3,}(\.[a-zA-Z0-9]{2,})*\.dll$"
61
-
62
- HOLLOWSHUNTER_TITLE = "HollowsHunter Injected Portable Executable"
63
-
64
- MIN_DOMAIN_CHARS = 8
65
- # Choosing an arbitrary number, based on https://webmasters.stackexchange.com/questions/16996/maximum-domain-name-length
66
- MAX_DOMAIN_CHARS = 100
67
- MIN_URI_CHARS = 11
68
- MIN_URI_PATH_CHARS = 4
69
-
70
- # There are samples that inject themselves for the entire analysis time
71
- # and have the potential to exceed depths of 1000. Also, the assumption with 10 is that no process
72
- # tree would be that complex and useful at the same time.
73
- PROCESS_TREE_DEPTH_LIMIT = 10
74
- OBJECTID_KEYS = [
75
- "tag",
76
- "ontology_id",
77
- "service_name",
78
- "guid",
79
- "treeid",
80
- "processtree",
81
- "time_observed",
82
- "session",
83
- ]
84
- POBJECTID_KEYS = [
85
- "ptag",
86
- "pontology_id",
87
- "pservice_name",
88
- "pguid",
89
- "ptreeid",
90
- "pprocesstree",
91
- "ptime_observed",
92
- "psession",
93
- "ontology_id",
94
- "service_name",
95
- ]
96
-
97
- MAX_TIME = format_time(MAX_TIME, LOCAL_FMT)
98
- MIN_TIME = format_time(MIN_TIME, LOCAL_FMT)
99
-
100
- SERVICE_NAME = None
101
-
102
- # The following lists of domains and top-level domains are used for finding false-positives
103
- # when extracting domains from text blobs
104
- COMMON_FP_DOMAINS = ["example.com"]
105
- COMMON_FP_TLDS_THAT_ARE_FILE_EXTS = [".one", ".pub", ".py", ".sh", ".zip"]
106
- COMMON_FP_TLDS_THAT_ARE_JS_COMMANDS = [".test", ".id", ".call", ".top", ".map", ".support", ".run", ".shell", ".net", ".stream"]
107
- COMMON_FP_TLDS = COMMON_FP_TLDS_THAT_ARE_FILE_EXTS + COMMON_FP_TLDS_THAT_ARE_JS_COMMANDS
108
-
109
- # Arbitrarily chosen common URL schemes from https://en.wikipedia.org/wiki/List_of_URI_schemes
110
- COMMON_SCHEMES = [
111
- "dns", "dntp", "file", "ftp", "git", "http", "https", "icap", "imap", "irc", "irc6", "ircs", "nfs", "rdp",
112
- "s3", "sftp", "shttp", "smb", "sms", "snmp", "ssh", "telnet", "tftp", "udp",
113
- ]
114
-
115
-
116
- def set_required_argument(self: object, name: str, value: Any, value_type: Any) -> None:
117
- """
118
- This method performs validation of a value that is to be set to an object attribute
119
- :param self: The object whose attribute will be set
120
- :param name: The name of the attribute
121
- :param value: The value to be set
122
- :param value_type: The type that the value should be
123
- :return: None
124
- """
125
- if not value:
126
- raise ValueError(f"{name} must have a legitimate value")
127
- elif not isinstance(value, value_type):
128
- raise TypeError(f"{name} must be a {value_type}")
129
- else:
130
- setattr(self, name, value)
131
-
132
-
133
- def set_optional_argument(self, name: str, value: Any, value_type: Any) -> None:
134
- """
135
- This method performs validation of an optional value that is to be set to an object attribute
136
- :param self: The object whose attribute will be set
137
- :param name: The name of the attribute
138
- :param value: The value to be set
139
- :param value_type: The type that the value should be
140
- :return: None
141
- """
142
- if value is not None and not value:
143
- raise ValueError(f"{name} must have a legitimate value")
144
- elif value and not isinstance(value, value_type):
145
- raise TypeError(f"{name} must be a {value_type}")
146
- else:
147
- setattr(self, name, value)
148
-
149
-
150
- def update_object_items(self: object, update_items: Dict[str, Any]) -> None:
151
- """
152
- This method updates the attributes of an object
153
- :param update_items: A dictionary where the keys are the object attributes to be updated
154
- :return: None
155
- """
156
- if all(value is None for value in update_items.values()):
157
- return
158
- for key, value in update_items.items():
159
- if value is None or value == "":
160
- continue
161
- if hasattr(self, key) and getattr(self, key) not in [
162
- "",
163
- None,
164
- [],
165
- {},
166
- (),
167
- MAX_TIME,
168
- MIN_TIME,
169
- ]:
170
- # DO NOT OVERWRITE DATA (UNLESS ITS EMPTY)
171
- pass
172
- elif hasattr(self, key):
173
- setattr(self, key, value)
174
- else:
175
- log.warning(
176
- f"{self.__class__} does not have the attribute {key}. Ignoring..."
177
- )
178
-
179
-
180
- class Artifact:
181
- """
182
- This class is used for representing artifacts found in sandboxes
183
- """
184
-
185
- def __init__(
186
- self,
187
- name: str = None,
188
- path: str = None,
189
- description: str = None,
190
- to_be_extracted: bool = None,
191
- sha256: str = None,
192
- ):
193
- """
194
- This method initializes an artifact object
195
- :param name: The name of the artifact
196
- :param path: The path of the artifact
197
- :param description: The description of the artifact
198
- :param to_be_extracted: A flag indicating if the artifact should be extracted or added as a supplementary file
199
- :param sha256: The SHA256 hash of the artifact's contents
200
- """
201
- if any(item is None for item in [name, path, description, to_be_extracted]):
202
- raise Exception("Missing positional arguments for Artifact validation")
203
-
204
- self.name = name
205
- self.path = path
206
- self.description = description
207
- self.to_be_extracted = to_be_extracted
208
- self.sha256 = sha256
209
-
210
- def as_primitives(self) -> Dict[str, Any]:
211
- """
212
- This method returns the dictionary representation of the object
213
- :return: The dictionary representation of the object
214
- """
215
- return {key: value for key, value in self.__dict__.items()}
216
-
217
-
218
- class ObjectID:
219
- def __init__(
220
- self,
221
- tag: str,
222
- ontology_id: str,
223
- service_name: Optional[str] = None,
224
- guid: Optional[str] = None,
225
- treeid: Optional[str] = None,
226
- processtree: Optional[str] = None,
227
- time_observed: Optional[str] = None,
228
- session: Optional[str] = None,
229
- ) -> None:
230
- """
231
- This method initializes the characteristics used to identify an object
232
- :param tag: The normalized tag of the object
233
- :param ontology_id: Unique identifier of ontology
234
- :param service_name: Component that generated this section
235
- :param guid: The GUID associated with the object
236
- :param treeid: The hash of the tree ID
237
- :param processtree: Human-readable tree ID (concatenation of tags)
238
- :param time_observed: The time at which the object was observed
239
- :param session: Unifying session name/ID
240
- :return: None
241
- """
242
- set_required_argument(self, "tag", tag, str)
243
- set_required_argument(self, "ontology_id", ontology_id, str)
244
-
245
- set_optional_argument(self, "service_name", service_name, str)
246
- if not self.service_name and SERVICE_NAME is None:
247
- raise ValueError("The service_name must be set")
248
- elif not self.service_name and SERVICE_NAME:
249
- self.service_name = SERVICE_NAME
250
-
251
- set_optional_argument(self, "guid", guid, str)
252
- if self.guid:
253
- # Enforce this format for all given guids
254
- self.guid = f"{{{str(UUID(guid)).upper()}}}"
255
-
256
- set_optional_argument(self, "treeid", treeid, str)
257
- set_optional_argument(self, "processtree", processtree, str)
258
- set_optional_argument(self, "time_observed", time_observed, str)
259
- set_optional_argument(self, "session", session, str)
260
-
261
- def as_primitives(self) -> Dict[str, Any]:
262
- """
263
- This method returns the dictionary representation of the object
264
- :return: The dictionary representation of the object
265
- """
266
- return {key: value for key, value in self.__dict__.items()}
267
-
268
- def assign_guid(self) -> None:
269
- """
270
- This method assigns the GUID for the process object
271
- :return: None
272
- """
273
- self.guid: str = f"{{{str(uuid4()).upper()}}}"
274
-
275
- def set_tag(self, tag: str) -> None:
276
- """
277
- This method updates the tag for the ObjectID
278
- :param tag: The tag of the ObjectID
279
- :return: None
280
- """
281
- if not isinstance(tag, str) or not tag:
282
- return
283
- self.tag = tag
284
-
285
- def set_time_observed(self, time_observed: str) -> None:
286
- """
287
- This method updates the time_observed for the ObjectID
288
- :param time_observed: The time_observed of the ObjectID
289
- :return: None
290
- """
291
- if not time_observed:
292
- raise ValueError("time_observed must have a legitimate value")
293
- elif time_observed and not isinstance(time_observed, str):
294
- raise TypeError("time_observed must be a str")
295
- else:
296
- if "." in time_observed:
297
- time_observed = time_observed[:time_observed.index(".")]
298
- self.time_observed = str(datetime.strptime(time_observed, LOCAL_FMT))
299
-
300
-
301
- class Process:
302
- def __init__(
303
- self,
304
- objectid: ObjectID,
305
- image: str,
306
- start_time: Optional[str] = None,
307
- pobjectid: Optional[ObjectID] = None,
308
- pimage: Optional[str] = None,
309
- pcommand_line: Optional[str] = None,
310
- ppid: Optional[int] = None,
311
- pid: Optional[int] = None,
312
- command_line: Optional[str] = None,
313
- end_time: Optional[str] = None,
314
- integrity_level: Optional[str] = None,
315
- image_hash: Optional[str] = None,
316
- original_file_name: Optional[str] = None,
317
- ) -> None:
318
- """
319
- This method initializes a process object
320
- :param objectid: The object ID of the process object
321
- :param image: The image of the process
322
- :param start_time: The time of creation for the process
323
- :param pobjectid: The object ID of the parent process object
324
- :param pimage: The image of the parent process that spawned this process
325
- :param pcommand_line: The command line that the parent process ran
326
- :param ppid: The process ID of the parent process
327
- :param pid: The process ID
328
- :param command_line: The command line that the process ran
329
- :param end_time: The time of termination for the process
330
- :param integrity_level: The integrity level of the process
331
- :param image_hash: The hash of the file run
332
- :param original_file_name: The original name of the file
333
- :return: None
334
- """
335
- if (
336
- start_time
337
- and end_time
338
- and local_to_epoch(start_time) > local_to_epoch(end_time)
339
- ):
340
- raise ValueError(
341
- f"Start time {start_time} cannot be greater than end time {end_time}."
342
- )
343
-
344
- if pid and ppid and pid == ppid:
345
- raise ValueError(f"PID {pid} cannot be equal to its PPID")
346
-
347
- set_required_argument(self, "objectid", objectid, ObjectID)
348
- set_required_argument(self, "image", image, str)
349
- set_optional_argument(self, "start_time", start_time, str)
350
- if self.objectid and self.start_time and self.objectid.time_observed is None:
351
- self.objectid.time_observed = self.start_time
352
-
353
- # Parent process details
354
- set_optional_argument(self, "pobjectid", pobjectid, ObjectID)
355
- set_optional_argument(self, "pimage", pimage, str)
356
- set_optional_argument(self, "pcommand_line", pcommand_line, str)
357
- set_optional_argument(self, "ppid", ppid, int)
358
-
359
- set_optional_argument(self, "pid", pid, int)
360
- set_optional_argument(self, "command_line", command_line, str)
361
- set_optional_argument(self, "end_time", end_time, str)
362
-
363
- set_optional_argument(self, "integrity_level", integrity_level, str)
364
- if self.integrity_level:
365
- self.integrity_level = self.integrity_level.lower()
366
-
367
- set_optional_argument(self, "image_hash", image_hash, str)
368
- set_optional_argument(self, "original_file_name", original_file_name, str)
369
-
370
- def as_primitives(self) -> Dict[str, Any]:
371
- """
372
- This method returns the dictionary representation of the object
373
- :return: The dictionary representation of the object
374
- """
375
- return {
376
- key: value if not isinstance(value, ObjectID) else value.as_primitives()
377
- for key, value in self.__dict__.items()
378
- }
379
-
380
- def update(self, **kwargs) -> None:
381
- """
382
- This method updates attributes with the given keyword arguments
383
- :param kwargs: Key word arguments to be used for updating attributes
384
- :return: None
385
- """
386
- if all(value is None for value in kwargs.values()):
387
- return
388
-
389
- if "objectid" in kwargs:
390
- objectid = kwargs.pop("objectid")
391
- if objectid and isinstance(objectid, ObjectID):
392
- self.update_objectid(**objectid.as_primitives())
393
- elif isinstance(objectid, Dict):
394
- self.update_objectid(**objectid)
395
-
396
- if "pobjectid" in kwargs:
397
- pobjectid = kwargs.pop("pobjectid")
398
- if pobjectid and isinstance(pobjectid, ObjectID):
399
- self.update_pobjectid(**pobjectid.as_primitives())
400
- elif isinstance(pobjectid, Dict):
401
- self.update_pobjectid(**pobjectid)
402
-
403
- if "start_time" in kwargs and self.objectid.time_observed is None:
404
- self.objectid.set_time_observed(kwargs["start_time"])
405
-
406
- if "integrity_level" in kwargs and isinstance(kwargs["integrity_level"], str):
407
- kwargs["integrity_level"] = kwargs["integrity_level"].lower()
408
-
409
- # Remove objectid attributes
410
- kwargs = {
411
- key: value
412
- for key, value in kwargs.items()
413
- if key not in OBJECTID_KEYS and key not in POBJECTID_KEYS
414
- }
415
- update_object_items(self, kwargs)
416
-
417
- def set_parent(self, parent: object) -> None:
418
- """
419
- This method sets the parent details for the process
420
- :param parent: The Process object for the parent process
421
- :return: None
422
- """
423
- if parent is None or parent == self:
424
- return
425
- self.pobjectid = parent.objectid
426
- self.pimage: str = parent.image
427
- if self.pcommand_line is None:
428
- self.pcommand_line: str = parent.command_line
429
- self.ppid: int = parent.pid
430
-
431
- def set_start_time(self, start_time: str) -> None:
432
- """
433
- This method updates the start time for the Process
434
- :param start_time: The start time of the Process
435
- :return: None
436
- """
437
- self.start_time = start_time
438
-
439
- def set_end_time(self, end_time: str) -> None:
440
- """
441
- This method updates the end time for the Process
442
- :param end_time: The end time of the Process
443
- :return: None
444
- """
445
- self.end_time = end_time
446
-
447
- def is_guid_a_match(self, guid: str) -> bool:
448
- """
449
- This method confirms if a given GUID matches the Process object's GUID
450
- :param guid: The GUID to requested to confirm a match
451
- :return: A boolean flag representing if the GUID matched
452
- """
453
- try:
454
- return self.objectid.guid == f"{{{str(UUID(guid)).upper()}}}"
455
- except ValueError:
456
- return False
457
-
458
- def set_objectid_tag(self, image: Optional[str]) -> None:
459
- """
460
- This method normalizes the image path and sets the objectid tag
461
- :return: None
462
- """
463
- if not image:
464
- return
465
- self.objectid.set_tag(Process.create_objectid_tag(image))
466
-
467
- @staticmethod
468
- def create_objectid_tag(image: Optional[str]) -> Optional[str]:
469
- """
470
- This method normalizes the image path and creates the objectid tag
471
- :return: None
472
- """
473
- if not image:
474
- return
475
-
476
- return Process._normalize_path(image)
477
-
478
- def set_pobjectid_tag(self, image: Optional[str]) -> None:
479
- """
480
- This method normalizes the image path and sets the pobjectid tag
481
- :return: None
482
- """
483
- if not image:
484
- return
485
- if not self.pobjectid:
486
- log.debug("You need to set pobjectid before setting its tag")
487
- return
488
- self.pobjectid.set_tag(Process._normalize_path(image))
489
-
490
- def update_objectid(self, **kwargs) -> None:
491
- """
492
- This method updates the process objectid attributes with the given keyword arguments
493
- :param kwargs: Key word arguments to be used for updating the process objectid attributes
494
- :return: None
495
- """
496
- if all(value is None for value in kwargs.values()):
497
- return
498
-
499
- if kwargs.get("guid"):
500
- try:
501
- kwargs["guid"] = f"{{{str(UUID(kwargs['guid'])).upper()}}}"
502
- except ValueError:
503
- log.warning(f"Invalid GUID '{kwargs.pop('guid')}'")
504
-
505
- update_object_items(self.objectid, kwargs)
506
-
507
- def update_pobjectid(self, **kwargs) -> None:
508
- """
509
- This method updates the process pobjectid attributes with the given keyword arguments
510
- :param kwargs: Key word arguments to be used for updating the process pobjectid attributes
511
- :return: None
512
- """
513
- if all(value is None for value in kwargs.values()):
514
- return
515
-
516
- if (
517
- not self.pobjectid
518
- and kwargs.get("tag")
519
- and kwargs.get("ontology_id")
520
- and kwargs.get("service_name")
521
- ):
522
- self.pobjectid: ObjectID = ObjectID(
523
- kwargs["tag"], kwargs["ontology_id"], kwargs["service_name"]
524
- )
525
- elif not self.pobjectid:
526
- log.debug("You need to set pobjectid or pass its required arguments")
527
- return
528
-
529
- if kwargs.get("guid"):
530
- try:
531
- kwargs["guid"] = f"{{{str(UUID(kwargs['guid'])).upper()}}}"
532
- except ValueError:
533
- log.warning(f"Invalid GUID {kwargs.pop('guid')}")
534
-
535
- update_object_items(self.pobjectid, kwargs)
536
-
537
- @staticmethod
538
- def _determine_arch(path: str) -> str:
539
- """
540
- This method determines what architecture the operating system was built with where the event took place
541
- :param path: The file path of the image associated with an event
542
- :return: The architecture of the operating system
543
- """
544
- # Clear indicators in a file path of the architecture of the operating system
545
- if any(item in path for item in ["program files (x86)", "syswow64"]):
546
- return X86_64
547
- return X86
548
-
549
- @staticmethod
550
- def _pattern_substitution(path: str, rule: Dict[str, str]) -> str:
551
- """
552
- This method applies pattern rules for explicit string substitution
553
- :param path: The file path of the image associated with an event
554
- :param rule: The rule to be applied, containing a pattern and the replacement value
555
- :return: The modified path, if any rules applied
556
- """
557
- if path.startswith(rule["pattern"]):
558
- path = path.replace(rule["pattern"], rule["replacement"])
559
- return path
560
-
561
- @staticmethod
562
- def _regex_substitution(path: str, rule: Dict[str, str]) -> str:
563
- """
564
- This method applies a regular expression for implicit string substitution
565
- :param path: The file path of the image associated with an event
566
- :param rule: The rule to be applied, containing a pattern and the replacement value
567
- :return: The modified path, if any rules applied
568
- """
569
- rule["regex"] = rule["regex"].split("*")
570
- rule["regex"] = [escape(e) for e in rule["regex"]]
571
- rule["regex"] = "[^\\\\]+".join(rule["regex"])
572
- path = sub(rf"{rule['regex']}", rule["replacement"], path)
573
- return path
574
-
575
- @staticmethod
576
- def _normalize_path(path: str, arch: Optional[str] = None) -> str:
577
- """
578
- This method determines what rules should be applied based on architecture and the applies the rules to the path
579
- :param path: The file path of the image associated with an event
580
- :param arch: The architecture of the operating system
581
- :return: The modified path, if any rules applied
582
- """
583
- path = path.lower()
584
- if not arch:
585
- arch = Process._determine_arch(path)
586
-
587
- # Order here matters
588
- rules: List[Dict[str, str]] = []
589
- rules.append(
590
- {
591
- "pattern": SYSTEM_ROOT + ARCH_SPECIFIC_DEFAULTS[arch]["szSys86"],
592
- "replacement": "?sys32",
593
- }
594
- )
595
- if arch == X86_64:
596
- rules.append(
597
- {
598
- "pattern": SYSTEM_ROOT + ARCH_SPECIFIC_DEFAULTS[arch]["szSys64"],
599
- "replacement": "?sys64",
600
- }
601
- )
602
- rules.append(
603
- {
604
- "pattern": SYSTEM_DRIVE + ARCH_SPECIFIC_DEFAULTS[arch]["szProgFiles86"],
605
- "replacement": "?pf86",
606
- }
607
- )
608
- if arch == X86_64:
609
- rules.append(
610
- {
611
- "pattern": SYSTEM_DRIVE
612
- + ARCH_SPECIFIC_DEFAULTS[arch]["szProgFiles64"],
613
- "replacement": "?pf64",
614
- }
615
- )
616
- rules.append(
617
- {"regex": f"{SYSTEM_DRIVE}{SZ_USR_TEMP_PATH}", "replacement": "?usrtmp\\\\"}
618
- )
619
- rules.append(
620
- {"regex": f"{SYSTEM_DRIVE}{SZ_USR_PATH}", "replacement": "?usr\\\\"}
621
- )
622
- rules.append({"pattern": SYSTEM_ROOT, "replacement": "?win\\"})
623
- rules.append({"pattern": SYSTEM_DRIVE, "replacement": "?c\\"})
624
- rules.append({"pattern": WINDIR_ENV_VARIABLE, "replacement": "?win"})
625
- rules.append({"pattern": SAMPLEPATH_ENV_VARIABLE, "replacement": "?usrtmp"})
626
- for rule in rules:
627
- if "pattern" in rule:
628
- path = Process._pattern_substitution(path, rule)
629
- if "regex" in rule:
630
- path = Process._regex_substitution(path, rule)
631
- return path
632
-
633
-
634
- class NetworkDNS:
635
- def __init__(
636
- self,
637
- domain: str,
638
- resolved_ips: List[str],
639
- lookup_type: str,
640
- ) -> None:
641
- """
642
- Details for a DNS request
643
- :param domain: The domain requested
644
- :param resolved_ips: A list of IPs that were resolved
645
- :param lookup_type: The type of DNS request
646
- :return: None
647
- """
648
- set_required_argument(self, "domain", domain, str)
649
- set_required_argument(self, "resolved_ips", resolved_ips, List)
650
- set_required_argument(self, "lookup_type", lookup_type, str)
651
-
652
- def as_primitives(self) -> Dict[str, Any]:
653
- """
654
- This method returns the dictionary representation of the object
655
- :return: The dictionary representation of the object
656
- """
657
- return {key: value for key, value in self.__dict__.items()}
658
-
659
-
660
- class NetworkHTTP:
661
- def __init__(
662
- self,
663
- request_uri: str,
664
- request_method: str,
665
- request_headers: Optional[Dict[str, str]] = None,
666
- response_headers: Optional[Dict[str, str]] = None,
667
- request_body: Optional[str] = None,
668
- response_status_code: Optional[int] = None,
669
- response_body: Optional[str] = None,
670
- request_body_path: Optional[str] = None,
671
- response_body_path: Optional[str] = None,
672
- ) -> None:
673
- """
674
- Details for an HTTP request
675
- :param request_uri: The URI requested
676
- :param request_method: The method of the request
677
- :param request_headers: Headers included in the request
678
- :param response_headers: The headers of the response
679
- :param request_body: The body of the request
680
- :param response_status_code: The status code of the response
681
- :param response_body: The body of the response
682
- :param request_body_path: The path to the file containing the request body
683
- :param response_body_path: The path to the file containing the response body
684
- :return: None
685
- """
686
- set_required_argument(self, "request_uri", request_uri, str)
687
- set_required_argument(self, "request_method", request_method, str)
688
-
689
- set_optional_argument(self, "request_headers", request_headers, Dict)
690
- if not self.request_headers:
691
- self.request_headers: Dict[str, str] = {}
692
-
693
- set_optional_argument(self, "response_headers", response_headers, Dict)
694
- if not self.response_headers:
695
- self.response_headers: Dict[str, str] = {}
696
-
697
- set_optional_argument(self, "request_body", request_body, str)
698
- set_optional_argument(self, "response_status_code", response_status_code, int)
699
- set_optional_argument(self, "response_body", response_body, str)
700
- set_optional_argument(self, "request_body_path", request_body_path, str)
701
- set_optional_argument(self, "response_body_path", response_body_path, str)
702
-
703
- def update(self, **kwargs) -> None:
704
- """
705
- This method updates networkhttp attributes with the given keyword arguments
706
- :param kwargs: Key word arguments to be used for updating the networkhttp attributes
707
- :return: None
708
- """
709
- if all(value is None for value in kwargs.values()):
710
- return
711
- update_object_items(self, kwargs)
712
-
713
- def as_primitives(self) -> Dict[str, Any]:
714
- """
715
- This method returns the dictionary representation of the object
716
- :return: The dictionary representation of the object
717
- """
718
- return {
719
- key: value
720
- for key, value in self.__dict__.items()
721
- if key not in ["request_body_path", "response_body_path"]
722
- }
723
-
724
-
725
- class NetworkConnection:
726
- OUTBOUND = "outbound"
727
- INBOUND = "inbound"
728
- UNKNOWN = "unknown"
729
- DIRECTIONS = [OUTBOUND, INBOUND, UNKNOWN]
730
- TCP = "tcp"
731
- UDP = "udp"
732
- TRANSPORT_LAYER_PROTOCOL = [TCP, UDP]
733
- HTTP = "http"
734
- DNS = "dns"
735
- CONNECTION_TYPES = [HTTP, DNS]
736
-
737
- def __init__(
738
- self,
739
- objectid: ObjectID,
740
- destination_ip: str,
741
- destination_port: int,
742
- transport_layer_protocol: str,
743
- direction: str,
744
- process: Optional[Process] = None,
745
- source_ip: Optional[str] = None,
746
- source_port: Optional[int] = None,
747
- http_details: Optional[NetworkHTTP] = None,
748
- dns_details: Optional[NetworkDNS] = None,
749
- connection_type: Optional[str] = None,
750
- ) -> None:
751
- """
752
- Details for a low-level network connection by IP
753
- :param objectid: The object ID of the network object
754
- :param destination_ip: The destination IP of the connection
755
- :param destination_port: The destination port of the connection
756
- :param transport_layer_protocol: The transport layer protocol of the connection
757
- :param direction: The direction of the network connection
758
- :param process: The process that spawned the network connection
759
- :param source_ip: The source IP of the connection
760
- :param source_port: The source port of the connection
761
- :param http_details: HTTP-specific details of request
762
- :param dns_details: DNS-specific details of request
763
- :param connection_type: Type of connection being made
764
- :return: None
765
- """
766
- if transport_layer_protocol not in self.TRANSPORT_LAYER_PROTOCOL:
767
- raise ValueError(
768
- f"Invalid transport layer protocol: {transport_layer_protocol}"
769
- )
770
-
771
- if direction not in self.DIRECTIONS:
772
- raise ValueError(f"Invalid direction: {direction}")
773
-
774
- set_required_argument(self, "objectid", objectid, ObjectID)
775
- set_required_argument(self, "destination_ip", destination_ip, str)
776
- set_required_argument(self, "destination_port", destination_port, int)
777
- set_required_argument(
778
- self, "transport_layer_protocol", transport_layer_protocol, str
779
- )
780
- set_required_argument(self, "direction", direction, str)
781
-
782
- set_optional_argument(self, "process", process, Process)
783
- set_optional_argument(self, "source_ip", source_ip, str)
784
- set_optional_argument(self, "source_port", source_port, int)
785
- set_optional_argument(self, "http_details", http_details, NetworkHTTP)
786
- set_optional_argument(self, "dns_details", dns_details, NetworkDNS)
787
- if self.http_details and self.dns_details:
788
- raise ValueError(
789
- "A network connection cannot be associated to both a DNS and an HTTP call."
790
- )
791
- set_optional_argument(self, "connection_type", connection_type, str)
792
- if self.connection_type:
793
- if self.connection_type not in self.CONNECTION_TYPES:
794
- raise ValueError(
795
- f"Connection type {self.connection_type} must be one of {self.CONNECTION_TYPES}"
796
- )
797
- elif self.connection_type == self.HTTP and self.http_details is None:
798
- raise ValueError(
799
- f"Connection type is {self.HTTP} but {self.HTTP}_details is None"
800
- )
801
- elif self.connection_type == self.DNS and self.dns_details is None:
802
- raise ValueError(
803
- f"Connection type is {self.DNS} but {self.DNS}_details is None"
804
- )
805
- else:
806
- if self.http_details or self.dns_details:
807
- raise ValueError("Specify the connection type")
808
-
809
- def update_objectid(self, **kwargs) -> None:
810
- """
811
- This method updates the network connection objectid attributes with the given keyword arguments
812
- :param kwargs: Key word arguments to be used for updating the network connection objectid attributes
813
- :return: None
814
- """
815
- if all(value is None for value in kwargs.values()):
816
- return
817
- update_object_items(self.objectid, kwargs)
818
-
819
- def update(self, **kwargs) -> None:
820
- """
821
- This method updates attributes with the given keyword arguments
822
- :param kwargs: Key word arguments to be used for updating attributes
823
- :return: None
824
- """
825
- if all(value is None for value in kwargs.values()):
826
- return
827
-
828
- if "objectid" in kwargs:
829
- objectid = kwargs.pop("objectid")
830
- if objectid and isinstance(objectid, ObjectID):
831
- self.update_objectid(**objectid.as_primitives())
832
- elif objectid and isinstance(objectid, Dict):
833
- self.update_objectid(**objectid)
834
- else:
835
- # Get the objectid attributes out
836
- objectid_kwargs = {
837
- key: value for key, value in kwargs.items() if key in OBJECTID_KEYS
838
- }
839
- self.update_objectid(**objectid_kwargs)
840
-
841
- if "process" in kwargs:
842
- process = kwargs.pop("process")
843
- if process:
844
- if isinstance(process, Process):
845
- self.set_process(process)
846
- elif isinstance(process, Dict):
847
- self.update_process(**process)
848
-
849
- # Remove objectid attributes
850
- kwargs = {
851
- key: value for key, value in kwargs.items() if key not in OBJECTID_KEYS
852
- }
853
- update_object_items(self, kwargs)
854
-
855
- def update_process(self, **kwargs) -> None:
856
- """
857
- This method updates the process object attribute with the given keyword arguments
858
- :param kwargs: Key word arguments to be used for updating the process object attribute
859
- :return: None
860
- """
861
- if (
862
- not self.process
863
- and kwargs.get("objectid")
864
- and kwargs.get("image")
865
- and kwargs.get("start_time")
866
- ):
867
- self.process: Process = Process(
868
- kwargs["objectid"], kwargs["image"], kwargs["start_time"]
869
- )
870
- elif not self.process:
871
- log.debug("You need to set process or pass its required arguments")
872
- return
873
- self.process.update(**kwargs)
874
-
875
- def update_process_objectid(self, **kwargs) -> None:
876
- """
877
- This method updates the process ObjectID with the given keyword arguments
878
- :param kwargs: Key word arguments to be used for updating the process object attribute
879
- :return: None
880
- """
881
- if not self.process:
882
- raise ValueError(
883
- "Process must be set before you can update the process ObjectID"
884
- )
885
- self.process.update_objectid(**kwargs)
886
-
887
- def set_process(self, process: Process) -> None:
888
- """
889
- This method sets the process object attribute to the given process
890
- :param process: The given process object
891
- :return: None
892
- """
893
- self.process = process
894
-
895
- @staticmethod
896
- def create_tag(
897
- destination_ip: Optional[str] = None,
898
- destination_port: Optional[int] = None,
899
- domain: Optional[str] = None,
900
- direction: Optional[str] = None,
901
- ) -> Optional[str]:
902
- """
903
- This method creates the tag object for a network connection
904
- :param destination_ip: The destination IP of the connection
905
- :param destination_port: The destination port of the connection
906
- :param domain: The domain associated with the destination IP used in this network connection
907
- :param direction: The direction of the network connection
908
- :return: The created tag, if any
909
- """
910
- if not domain and destination_ip is None:
911
- log.debug(
912
- "Cannot set tag for network connection. Requires either domain or destination IP..."
913
- )
914
- return
915
- if destination_port is None:
916
- log.debug(
917
- "Cannot set tag for network connection. Requires destination port..."
918
- )
919
- return
920
-
921
- if domain and direction == NetworkConnection.OUTBOUND:
922
- return f"{domain}:{destination_port}"
923
- # If no domain or if direction is inbound/unknown
924
- else:
925
- return f"{destination_ip}:{destination_port}"
926
-
927
- def as_primitives(self) -> Dict[str, Any]:
928
- """
929
- This method returns the dictionary representation of the object
930
- :return: The dictionary representation of the object
931
- """
932
- return {
933
- key: value
934
- if (
935
- not isinstance(value, Process)
936
- and not isinstance(value, ObjectID)
937
- and not isinstance(value, NetworkDNS)
938
- and not isinstance(value, NetworkHTTP)
939
- )
940
- else value.as_primitives()
941
- for key, value in self.__dict__.items()
942
- }
943
-
944
-
945
- class Attribute:
946
-
947
- actions = [
948
- "clipboard_capture",
949
- "create_remote_thread",
950
- "create_stream_hash",
951
- "dns_query",
952
- "driver_loaded",
953
- "file_change",
954
- "file_creation",
955
- "file_delete",
956
- "image_loaded",
957
- "network_connection",
958
- "network_connection_linux",
959
- "pipe_created",
960
- "process_access",
961
- "process_creation",
962
- "process_creation_linux",
963
- "process_tampering",
964
- "process_terminated",
965
- "raw_access_thread",
966
- "registry_add",
967
- "registry_delete",
968
- "registry_event",
969
- "registry_rename",
970
- "registry_set",
971
- "sysmon_error",
972
- "sysmon_status",
973
- "wmi_event",
974
- ]
975
-
976
- def __init__(
977
- self,
978
- source: ObjectID,
979
- target: Optional[ObjectID] = None,
980
- action: Optional[str] = None,
981
- meta: Optional[str] = None,
982
- event_record_id: Optional[str] = None,
983
- domain: Optional[str] = None,
984
- uri: Optional[str] = None,
985
- file_hash: Optional[str] = None,
986
- ) -> None:
987
- """
988
- Attribute relating to the signature that was raised during the analysis of the task
989
- :param source: Object that the rule triggered on
990
- :param target: Object targetted by source object
991
- :param action: The relation between the source and target
992
- :param meta: Metadata about the detection
993
- :param event_record_id: Event Record ID (Event Logs)
994
- :param domain: Domain
995
- :param uri: URI
996
- :param file_hash: SHA256 of file
997
- :return: None
998
- """
999
- set_required_argument(self, "source", source, ObjectID)
1000
- set_optional_argument(self, "target", target, ObjectID)
1001
-
1002
- set_optional_argument(self, "action", action, str)
1003
- if self.action and self.action not in self.actions:
1004
- raise ValueError(
1005
- f"The action {self.action} is not in the list of valid actions"
1006
- )
1007
-
1008
- set_optional_argument(self, "meta", meta, str)
1009
- set_optional_argument(self, "event_record_id", event_record_id, str)
1010
- set_optional_argument(self, "domain", domain, str)
1011
- set_optional_argument(self, "uri", uri, str)
1012
- set_optional_argument(self, "file_hash", file_hash, str)
1013
-
1014
- def update(self, **kwargs) -> None:
1015
- """
1016
- This method updates the attribute object with the given keyword arguments
1017
- :param kwargs: Key word arguments to be used for updating the attribute object
1018
- :return: None
1019
- """
1020
- update_object_items(self, kwargs)
1021
-
1022
- def as_primitives(self) -> Dict[str, Any]:
1023
- """
1024
- This method returns the dictionary representation of the object
1025
- :return: The dictionary representation of the object
1026
- """
1027
- return {
1028
- key: value if not isinstance(value, ObjectID) else value.as_primitives()
1029
- for key, value in self.__dict__.items()
1030
- }
1031
-
1032
-
1033
- class Signature:
1034
- types = ["CUCKOO", "YARA", "SIGMA", "SURICATA"]
1035
-
1036
- def __init__(
1037
- self,
1038
- objectid: ObjectID,
1039
- name: str,
1040
- type: str,
1041
- attributes: Optional[List[Attribute]] = None,
1042
- attacks: Optional[List[Dict[str, Any]]] = None,
1043
- actors: Optional[List[str]] = None,
1044
- malware_families: Optional[List[str]] = None,
1045
- score: Optional[int] = None,
1046
- ) -> None:
1047
- """
1048
- A signature that was raised during the analysis of the task
1049
- :param objectid: The object ID of the signature object
1050
- :param name: The name of the signature
1051
- :param type: Type of signature
1052
- :param attributes: Attributes about the signature
1053
- :param attacks: A list of ATT&CK patterns and categories of the signature
1054
- :param actors: List of actors of the signature
1055
- :param malware_families: List of malware families of the signature
1056
- :param score: Score of the signature
1057
- :return: None
1058
- """
1059
- set_required_argument(self, "objectid", objectid, ObjectID)
1060
- set_required_argument(self, "name", name, str)
1061
- set_required_argument(self, "type", type, str)
1062
- if self.type not in self.types:
1063
- raise ValueError(f"The type {self.type} is not a valid type")
1064
-
1065
- set_optional_argument(self, "attributes", attributes, List)
1066
- if not self.attributes:
1067
- self.attributes: List[Attribute] = []
1068
-
1069
- set_optional_argument(self, "attacks", attacks, List)
1070
- if not self.attacks:
1071
- self.attacks: List[Dict[str, Any]] = []
1072
-
1073
- set_optional_argument(self, "actors", actors, List)
1074
- if not self.actors:
1075
- self.actors: List[str] = []
1076
-
1077
- set_optional_argument(self, "malware_families", malware_families, List)
1078
- if not self.malware_families:
1079
- self.malware_families: List[str] = []
1080
-
1081
- set_optional_argument(self, "score", score, int)
1082
-
1083
- def update(self, **kwargs) -> None:
1084
- """
1085
- This method updates the signature object with the given keyword arguments
1086
- :param kwargs: Key word arguments to be used for updating the signature object
1087
- :return: None
1088
- """
1089
- update_object_items(self, kwargs)
1090
-
1091
- def add_attack_id(self, attack_id: str) -> None:
1092
- """
1093
- This method adds an Att&ck ID to the signature's list of Att&ck IDs
1094
- :param attack_id: The Att&ck ID to add
1095
- :return: None
1096
- """
1097
- attack_item = None
1098
- attack_id = revoke_map.get(attack_id, attack_id)
1099
- current_attack_ids = [a["attack_id"] for a in self.attacks]
1100
- if attack_id in current_attack_ids:
1101
- return
1102
-
1103
- if attack_id in attack_map:
1104
- attack_item = dict(
1105
- attack_id=attack_id,
1106
- pattern=attack_map[attack_id]["name"],
1107
- categories=attack_map[attack_id]["categories"],
1108
- )
1109
- elif attack_id in software_map:
1110
- attack_item = dict(
1111
- attack_id=attack_id,
1112
- pattern=software_map[attack_id].get("name", attack_id),
1113
- categories=["software"],
1114
- )
1115
- elif attack_id in group_map:
1116
- attack_item = dict(
1117
- attack_id=attack_id,
1118
- pattern=group_map[attack_id].get("name", attack_id),
1119
- categories=["group"],
1120
- )
1121
-
1122
- if attack_item:
1123
- self.attacks.append(attack_item)
1124
- else:
1125
- log.warning(f"Could not generate Att&ck output for ID: {attack_id}")
1126
-
1127
- @staticmethod
1128
- def create_attribute(**kwargs) -> Optional[Attribute]:
1129
- """
1130
- This method creates an Attribute, assigns its attributes based on keyword arguments provided,
1131
- and returns the Attribute
1132
- :param kwargs: Key word arguments to be used for updating the Attribute's attributes
1133
- :return: Attribute object
1134
- """
1135
- # We want to perform this backend check for Attribute kwargs since they have a high degree of variability
1136
- if all(value is None for value in kwargs.values()):
1137
- return
1138
-
1139
- if not kwargs.get("source"):
1140
- raise ValueError("The attribute needs its required arguments")
1141
- elif not isinstance(kwargs["source"], ObjectID):
1142
- raise ValueError("source is not an ObjectID")
1143
-
1144
- attribute = Attribute(source=kwargs["source"])
1145
- update_object_items(attribute, kwargs)
1146
- return attribute
1147
-
1148
- def add_attribute(self, attribute: Attribute) -> None:
1149
- """
1150
- This method adds an attribute to the list of attributes for the signature.
1151
- :param attribute: The attribute to be added
1152
- :return: None
1153
- """
1154
- if any(
1155
- attribute.as_primitives() == added_attribute.as_primitives()
1156
- for added_attribute in self.attributes
1157
- ):
1158
- return
1159
-
1160
- self.attributes.append(attribute)
1161
-
1162
- def get_attributes(self) -> List[Attribute]:
1163
- """
1164
- This method returns the attributes associated with the signature
1165
- :return: The list of attributes associated with the signature
1166
- """
1167
- return self.attributes
1168
-
1169
- def set_score(self, score: int) -> None:
1170
- """
1171
- This method sets the signature score
1172
- :param score: The score to set
1173
- :return: None
1174
- """
1175
- self.score: int = score
1176
-
1177
- def set_malware_families(self, malware_families: List[str]) -> None:
1178
- """
1179
- This method sets the signature malware families
1180
- :param malware_families: The malware families to set
1181
- :return: None
1182
- """
1183
- self.malware_families: List[str] = (
1184
- malware_families
1185
- if isinstance(malware_families, List)
1186
- and all(
1187
- isinstance(malware_family, str) for malware_family in malware_families
1188
- )
1189
- else []
1190
- )
1191
-
1192
- def as_primitives(self) -> Dict[str, Any]:
1193
- """
1194
- This method returns the dictionary representation of the object
1195
- :return: The dictionary representation of the object
1196
- """
1197
-
1198
- return {
1199
- "objectid": self.objectid.as_primitives(),
1200
- "name": self.name,
1201
- "type": self.type,
1202
- "attributes": [attribute.as_primitives() for attribute in self.attributes],
1203
- "attacks": self.attacks,
1204
- "actors": self.actors,
1205
- "malware_families": self.malware_families,
1206
- }
1207
-
1208
-
1209
- class Sandbox:
1210
- class AnalysisMetadata:
1211
- class MachineMetadata:
1212
- def __init__(
1213
- self,
1214
- ip: Optional[str] = None,
1215
- hypervisor: Optional[str] = None,
1216
- hostname: Optional[str] = None,
1217
- platform: Optional[str] = None,
1218
- version: Optional[str] = None,
1219
- architecture: Optional[str] = None,
1220
- ) -> None:
1221
- """
1222
- The metadata regarding the machine where the analysis took place
1223
- :param ip: The IP of the machine used for analysis
1224
- :param hypervisor: The hypervisor of the machine used for analysis
1225
- :param hostname: The name of the machine used for analysis
1226
- :param platform: The platform of the machine used for analysis
1227
- :param version: The version of the operating system of the machine used for analysis
1228
- :param architecture: The architecture of the machine used for analysis
1229
- """
1230
- set_optional_argument(self, "ip", ip, str)
1231
- set_optional_argument(self, "hypervisor", hypervisor, str)
1232
- set_optional_argument(self, "hostname", hostname, str)
1233
- set_optional_argument(self, "platform", platform, str)
1234
- set_optional_argument(self, "version", version, str)
1235
- set_optional_argument(self, "architecture", architecture, str)
1236
-
1237
- def as_primitives(self) -> Dict[str, Any]:
1238
- """
1239
- This method returns the dictionary representation of the object
1240
- :return: The dictionary representation of the object
1241
- """
1242
- return {key: value for key, value in self.__dict__.items()}
1243
-
1244
- def load_from_json(self, json: Dict[str, Any]) -> None:
1245
- """
1246
- This method takes a given json and sets the corresponding attributes to those values
1247
- :param json: The the given json representation of the machine metadata
1248
- :return: None
1249
- """
1250
- self.ip = json["ip"]
1251
- self.hypervisor = json["hypervisor"]
1252
- self.hostname = json["hostname"]
1253
- self.platform = json["platform"]
1254
- self.version = json["version"]
1255
- self.architecture = json["architecture"]
1256
-
1257
- def __init__(
1258
- self,
1259
- start_time: Optional[str] = None,
1260
- task_id: Optional[int] = None,
1261
- end_time: Optional[str] = None,
1262
- routing: Optional[str] = None,
1263
- machine_metadata: Optional[MachineMetadata] = None,
1264
- ) -> None:
1265
- """
1266
- The metadata of the analysis, per analysis
1267
- :param start_time: The start time of the analysis
1268
- :param task_id: The ID used for identifying the analysis task
1269
- :param end_time: The end time of the analysis
1270
- :param routing: The routing used in the sandbox setup (Spoofed, Internet, Tor, VPN)
1271
- :param machine_metadata: The metadata of the analysis
1272
- """
1273
- set_optional_argument(self, "start_time", start_time, str)
1274
- if not self.start_time:
1275
- self.start_time: str = MIN_TIME
1276
-
1277
- set_optional_argument(self, "task_id", task_id, int)
1278
-
1279
- set_optional_argument(self, "end_time", end_time, str)
1280
- if not self.end_time:
1281
- self.end_time: str = MAX_TIME
1282
-
1283
- set_optional_argument(self, "routing", routing, str)
1284
- set_optional_argument(
1285
- self, "machine_metadata", machine_metadata, self.MachineMetadata
1286
- )
1287
-
1288
- def as_primitives(self) -> Dict[str, Any]:
1289
- """
1290
- This method returns the dictionary representation of the object
1291
- :return: The dictionary representation of the object
1292
- """
1293
- return {
1294
- key: value
1295
- if not isinstance(value, self.MachineMetadata)
1296
- else value.as_primitives()
1297
- for key, value in self.__dict__.items()
1298
- }
1299
-
1300
- def load_from_json(self, json: Dict[str, Any]) -> None:
1301
- """
1302
- This method takes a given json and sets the corresponding attributes to those values
1303
- :param json: The the given json representation of the analysis metadata
1304
- :return: None
1305
- """
1306
- self.task_id = json["task_id"]
1307
- self.start_time = json["start_time"]
1308
- self.end_time = json["end_time"]
1309
- self.routing = json["routing"]
1310
- self.machine_metadata = self.MachineMetadata()
1311
- self.machine_metadata.load_from_json(json["machine_metadata"])
1312
-
1313
- def __init__(
1314
- self,
1315
- objectid: ObjectID,
1316
- analysis_metadata: AnalysisMetadata,
1317
- sandbox_name: str,
1318
- sandbox_version: Optional[str] = None,
1319
- ) -> None:
1320
- """
1321
- The result ontology for sandbox output
1322
- :param objectid: The object ID of the sandbox object
1323
- :param analysis_metadata: Metadata for the analysis
1324
- :param sandbox_name: The name of the sandbox
1325
- :param sandbox_version: The version of the sandbox
1326
- :return: None
1327
- """
1328
- set_required_argument(self, "objectid", objectid, ObjectID)
1329
- set_required_argument(
1330
- self, "analysis_metadata", analysis_metadata, self.AnalysisMetadata
1331
- )
1332
- set_required_argument(self, "sandbox_name", sandbox_name, str)
1333
- set_optional_argument(self, "sandbox_version", sandbox_version, str)
1334
-
1335
- def update_analysis_metadata(self, **kwargs) -> None:
1336
- """
1337
- This method updates the analysis_metadata object attribute with the given keyword arguments
1338
- :param kwargs: Key word arguments to be used for updating the analysis_metadata object attribute
1339
- :return: None
1340
- """
1341
- update_object_items(self.analysis_metadata, kwargs)
1342
-
1343
- def update_machine_metadata(self, **kwargs) -> None:
1344
- """
1345
- This method updates the machine_metadata object attribute with the given keyword arguments
1346
- :param kwargs: Key word arguments to be used for updating the machine_metadata object attribute
1347
- :return: None
1348
- """
1349
- if not self.analysis_metadata.machine_metadata:
1350
- self.analysis_metadata.machine_metadata = (
1351
- self.AnalysisMetadata.MachineMetadata()
1352
- )
1353
- update_object_items(self.analysis_metadata.machine_metadata, kwargs)
1354
-
1355
- def as_primitives(self) -> Dict[str, Any]:
1356
- """
1357
- This method returns the dictionary representation of the object
1358
- :return: The dictionary representation of the object
1359
- """
1360
- return {
1361
- "objectid": self.objectid.as_primitives(),
1362
- "analysis_metadata": self.analysis_metadata.as_primitives(),
1363
- "sandbox_name": self.sandbox_name,
1364
- "sandbox_version": self.sandbox_version,
1365
- }
1366
-
1367
-
1368
- class OntologyResults:
1369
- def __init__(self, service_name: Optional[str] = None) -> None:
1370
- """
1371
- The OntologyResults class object which will contain and manipulate all data
1372
- relating to the ontology results
1373
- :param service_name: The name of the service this ontology result is being generated for
1374
- :return: None
1375
- """
1376
- global SERVICE_NAME
1377
- SERVICE_NAME = service_name
1378
-
1379
- self.netflows: List[NetworkConnection] = []
1380
- self.dns_netflows: List[NetworkDNS] = []
1381
- self.http_netflows: List[NetworkHTTP] = []
1382
- self.processes: List[Process] = []
1383
- self.sandboxes: List[Sandbox] = []
1384
- self.signatures: List[Signature] = []
1385
- self._guid_process_map: Dict[str, Process] = {}
1386
- self.service_name = SERVICE_NAME
1387
-
1388
- # ObjectID manipulation methods
1389
- @staticmethod
1390
- def create_objectid(**kwargs) -> ObjectID:
1391
- """
1392
- This method creates an ObjectID, assigns its attributes based on keyword arguments provided,
1393
- and returns the ObjectID
1394
- :param kwargs: Key word arguments to be used for updating the ObjectID's attributes
1395
- :return: ObjectID object
1396
- """
1397
- if not (kwargs.get("tag") and kwargs.get("ontology_id")):
1398
- raise ValueError("The objectid needs its required arguments")
1399
- objectid = ObjectID(
1400
- kwargs["tag"], kwargs["ontology_id"], kwargs.get("service_name")
1401
- )
1402
- # Ensure that is time_observed is passed in and has a value, that that value is a str
1403
- if "time_observed" in kwargs and kwargs["time_observed"] is not None and not isinstance(kwargs["time_observed"], str):
1404
- raise ValueError("time_observed must be a str")
1405
- # Ensure that time_observed is of a certain format
1406
- elif "time_observed" in kwargs and kwargs["time_observed"] is not None and isinstance(kwargs["time_observed"], str):
1407
- kwargs["time_observed"] = str(datetime.strptime(kwargs["time_observed"], LOCAL_FMT))
1408
- update_object_items(objectid, kwargs)
1409
- return objectid
1410
-
1411
- @staticmethod
1412
- def create_session() -> str:
1413
- """
1414
- This method creates a random session ID, and a session ID == totally unique value separate from Sandbox Ontology ID
1415
- :return: The session ID
1416
- """
1417
- return get_random_id()
1418
-
1419
- # Sandbox manipulation methods
1420
- def set_sandboxes(self, sandboxes: List[Sandbox]) -> None:
1421
- """
1422
- This method sets the Sandbox objects
1423
- :param sandboxes: The sandboxes to set
1424
- :return: None
1425
- """
1426
- self.sandboxes = (
1427
- sandboxes
1428
- if isinstance(sandboxes, List)
1429
- and all(isinstance(sandbox, Sandbox) for sandbox in sandboxes)
1430
- else []
1431
- )
1432
-
1433
- def add_sandbox(self, sandbox: Sandbox) -> None:
1434
- """
1435
- This method adds a Sandbox object to the list of sandboxes
1436
- :param sandbox: The sandbox to add
1437
- :return: None
1438
- """
1439
- self.sandboxes.append(sandbox)
1440
-
1441
- @staticmethod
1442
- def create_sandbox(**kwargs) -> Sandbox:
1443
- """
1444
- This method creates a Sandbox object, assigns its attributes based on keyword arguments provided,
1445
- and returns the Sandbox object
1446
- :param kwargs: Key word arguments to be used for updating the Sandbox object's attributes
1447
- :return: Sandbox object
1448
- """
1449
- if not (kwargs.get("objectid") and kwargs.get("sandbox_name")):
1450
- raise ValueError("The sandbox needs its required arguments")
1451
- sandbox = Sandbox(
1452
- kwargs["objectid"], Sandbox.AnalysisMetadata(), kwargs["sandbox_name"]
1453
- )
1454
-
1455
- update_object_items(sandbox, kwargs)
1456
- if kwargs.get("analysis_metadata"):
1457
- sandbox.update_analysis_metadata(
1458
- **kwargs["analysis_metadata"].as_primitives()
1459
- )
1460
- return sandbox
1461
-
1462
- def get_sandbox_by_session(self, session: str) -> Optional[Sandbox]:
1463
- """
1464
- This method returns a Sandbox object that matches the given session
1465
- :param session: The session that we are looking for sandboxes that match
1466
- :return: A Sandbox object, if it exists
1467
- """
1468
- return next(
1469
- (
1470
- sandbox
1471
- for sandbox in self.sandboxes
1472
- if sandbox.objectid.session == session
1473
- ),
1474
- None,
1475
- )
1476
-
1477
- def get_sandboxes(self) -> List[Sandbox]:
1478
- """
1479
- This method is a getter for the sandboxes attribute
1480
- :return: The value of the sandboxes attribute
1481
- """
1482
- return self.sandboxes
1483
-
1484
- # Signature manipulation methods
1485
- def set_signatures(self, signatures: List[Signature]) -> None:
1486
- """
1487
- This method sets the Signature objects
1488
- :param signatures: The signatures to set
1489
- :return: None
1490
- """
1491
- self.signatures = (
1492
- signatures
1493
- if isinstance(signatures, List)
1494
- and all(isinstance(signature, Signature) for signature in signatures)
1495
- else []
1496
- )
1497
-
1498
- def create_signature(self, **kwargs) -> Signature:
1499
- """
1500
- This method creates a Signature object, assigns its attributes based on keyword arguments provided,
1501
- and returns the Signature object
1502
- :param kwargs: Key word arguments to be used for updating the Signature object's attributes
1503
- :return: Signature object
1504
- """
1505
- if not (kwargs.get("objectid") and kwargs.get("name") and kwargs.get("type")):
1506
- raise ValueError("The signature needs its required arguments")
1507
- signature = Signature(kwargs["objectid"], kwargs["name"], kwargs["type"])
1508
- if "description" in kwargs:
1509
- kwargs["description"] = kwargs["description"].lower()
1510
- update_object_items(signature, kwargs)
1511
- return signature
1512
-
1513
- def add_signature(self, signature: Signature) -> None:
1514
- """
1515
- This method adds a Signature object to the list of signatures
1516
- :param signature: The Signature object to be added
1517
- :return: None
1518
- """
1519
- self.signatures.append(signature)
1520
-
1521
- def get_signatures(self) -> List[Signature]:
1522
- """
1523
- This method is a getter for the signatures attribute
1524
- :return: The value of the signatures attribute
1525
- """
1526
- return self.signatures
1527
-
1528
- def get_signatures_by_pid(self, pid: int) -> List[Signature]:
1529
- """
1530
- This method allows the retrieval of signatures that match a certain process ID
1531
- :param pid: The process ID
1532
- :return: A list of signatures that match the process pid
1533
- """
1534
- signatures_with_pid: List[Signature] = []
1535
- processes_with_pid = [
1536
- process for process in self.processes if process.pid == pid
1537
- ]
1538
- for signature in self.signatures:
1539
- for attribute in signature.attributes:
1540
- if attribute.source.guid:
1541
- if any(
1542
- attribute.source.guid == process.objectid.guid
1543
- for process in processes_with_pid
1544
- ):
1545
- signatures_with_pid.append(signature)
1546
- elif any(
1547
- attribute.source.ontology_id == process.objectid.ontology_id
1548
- for process in processes_with_pid
1549
- ):
1550
- signatures_with_pid.append(signature)
1551
-
1552
- return signatures_with_pid
1553
-
1554
- @staticmethod
1555
- def create_attribute(**kwargs) -> Attribute:
1556
- """
1557
- This method creates an Attribute, assigns its attributes based on keyword arguments provided,
1558
- and returns the Attribute
1559
- :param kwargs: Key word arguments to be used for updating the Attribute's attributes
1560
- :return: Attribute object
1561
- """
1562
- return Signature.create_attribute(**kwargs)
1563
-
1564
- # NetworkConnection manipulation methods
1565
- def set_netflows(self, network_connections: List[NetworkConnection]) -> None:
1566
- """
1567
- This method sets the NetworkConnection objects. Note that a netflow == NetworkConnection
1568
- :param network_connections: The NetworkConnections to set
1569
- :return: None
1570
- """
1571
- self.netflows: List[NetworkConnection] = (
1572
- network_connections
1573
- if isinstance(network_connections, List)
1574
- and all(
1575
- isinstance(network_connection, NetworkConnection)
1576
- for network_connection in network_connections
1577
- )
1578
- else []
1579
- )
1580
-
1581
- def create_network_connection(self, **kwargs) -> NetworkConnection:
1582
- """
1583
- This method creates a NetworkConnection object, assigns its attributes based on keyword arguments provided,
1584
- and returns the NetworkConnection object
1585
- :param kwargs: Key word arguments to be used for updating the NetworkConnection object's attributes
1586
- :return: NetworkConnection object
1587
- """
1588
- if not (
1589
- kwargs.get("objectid")
1590
- and kwargs.get("destination_ip")
1591
- and kwargs.get("destination_port")
1592
- and kwargs.get("transport_layer_protocol")
1593
- and kwargs.get("direction")
1594
- ):
1595
- raise ValueError("The network connection needs its required arguments")
1596
-
1597
- network_connection = NetworkConnection(
1598
- kwargs["objectid"],
1599
- kwargs["destination_ip"],
1600
- kwargs["destination_port"],
1601
- kwargs["transport_layer_protocol"],
1602
- kwargs["direction"],
1603
- )
1604
- network_connection.update(**kwargs)
1605
- return network_connection
1606
-
1607
- def add_network_connection(self, network_connection: NetworkConnection) -> None:
1608
- """
1609
- This method adds a NetworkConnection object to the list of network connections
1610
- :param network_connection: The NetworkConnection object to be added
1611
- :return: None
1612
- """
1613
- # Check if network_connection.process needs linking
1614
- if network_connection.process:
1615
- if network_connection.process.objectid.guid:
1616
- guid = network_connection.process.objectid.guid
1617
- else:
1618
- guid = self.get_guid_by_pid_and_time(
1619
- network_connection.process.pid,
1620
- network_connection.process.start_time,
1621
- )
1622
- process_to_point_to = self.get_process_by_guid(guid)
1623
- # If we cannot link a process to this network connection, then don't include the process
1624
- network_connection.set_process(process_to_point_to)
1625
-
1626
- self.netflows.append(network_connection)
1627
-
1628
- def get_network_connections(self) -> List[NetworkConnection]:
1629
- """
1630
- This method returns the network connections
1631
- :return: The list of network connections
1632
- """
1633
- return self.netflows
1634
-
1635
- def get_network_connection_by_pid(self, pid: int) -> List[NetworkConnection]:
1636
- """
1637
- This method allows the retrieval of network connections that match a certain process ID
1638
- :param pid: The process ID
1639
- :return: A list of signatures that match the process pid
1640
- """
1641
- return [
1642
- network_connection
1643
- for network_connection in self.get_network_connections()
1644
- if getattr(network_connection.process, "pid", None) == pid
1645
- ]
1646
-
1647
- def get_network_connection_by_guid(
1648
- self, guid: Optional[str]
1649
- ) -> Optional[NetworkConnection]:
1650
- """
1651
- This method takes a given GUID and returns the associated network connection
1652
- :param guid: The given GUID that we want an associated network connection for
1653
- :return: The associated network connection
1654
- """
1655
- if guid is None:
1656
- return None
1657
-
1658
- network_connections_with_guid = [
1659
- network_connection
1660
- for network_connection in self.get_network_connections()
1661
- if network_connection.objectid.guid == guid
1662
- ]
1663
-
1664
- if not network_connections_with_guid:
1665
- return None
1666
- else:
1667
- return network_connections_with_guid[0]
1668
-
1669
- def get_network_connection_by_details(
1670
- self,
1671
- destination_ip: str,
1672
- destination_port: int,
1673
- direction: str,
1674
- transport_layer_protocol: str,
1675
- ) -> NetworkConnection:
1676
- """
1677
- This method finds an existing network connection based on specific details
1678
- NOTE: This isn't going to be the most exact method ever since it does not account for source IPs and ports
1679
- :param destination_ip: The destination IP of the network connection
1680
- :param destination_port: The destination port of the network connection
1681
- :param direction: The direction of the network connection
1682
- :param transport_layer_protocol: The transport layer protocol of the connection
1683
- :return: The matching network connection, if it exists
1684
- """
1685
- # All or nothing!
1686
- if any(
1687
- item is None
1688
- for item in [
1689
- destination_ip,
1690
- destination_port,
1691
- direction,
1692
- transport_layer_protocol,
1693
- ]
1694
- ):
1695
- return None
1696
-
1697
- # Due to the way INetSim traffic can be handled, let's check for
1698
- # network connections that are both HTTP and HTTPS
1699
- if destination_port == 80:
1700
- destination_ports = [80, 443]
1701
- else:
1702
- destination_ports = [destination_port]
1703
-
1704
- for network_connection in self.get_network_connections():
1705
- if (
1706
- network_connection.destination_ip == destination_ip
1707
- and network_connection.destination_port in destination_ports
1708
- and network_connection.direction == direction
1709
- and network_connection.transport_layer_protocol
1710
- == transport_layer_protocol
1711
- ):
1712
- return network_connection
1713
- return None
1714
-
1715
- # NetworkDNS manipulation methods
1716
- def set_dns_netflows(self, network_dns: List[NetworkDNS]) -> None:
1717
- """
1718
- This method sets the NetworkDNS objects. Note that a dns_netflow == NetworkDNS
1719
- :param network_dnss: The NetworkDNS to set
1720
- :return: None
1721
- """
1722
- self.dns_netflows: List[NetworkDNS] = (
1723
- network_dns
1724
- if isinstance(network_dns, List)
1725
- and all(isinstance(dns, NetworkDNS) for dns in network_dns)
1726
- else []
1727
- )
1728
-
1729
- def create_network_dns(self, **kwargs) -> NetworkDNS:
1730
- """
1731
- This method creates a NetworkDNS object, assigns its attributes based on keyword arguments provided,
1732
- and returns the NetworkDNS object
1733
- :param kwargs: Key word arguments to be used for updating the NetworkDNS object's attributes
1734
- :return: NetworkDNS object
1735
- """
1736
- if not (
1737
- kwargs.get("domain")
1738
- and kwargs.get("resolved_ips") is not None
1739
- and kwargs.get("lookup_type")
1740
- ):
1741
- raise ValueError("The network dns connection needs its required arguments")
1742
- network_dns = NetworkDNS(
1743
- kwargs["domain"], kwargs["resolved_ips"], kwargs["lookup_type"]
1744
- )
1745
- update_object_items(network_dns, kwargs)
1746
- return network_dns
1747
-
1748
- def add_network_dns(self, dns: NetworkDNS) -> None:
1749
- """
1750
- This method adds a NetworkDNS object to the list of network DNS calls
1751
- :param dns: The NetworkDNS object to be added
1752
- :return: None
1753
- """
1754
- self.dns_netflows.append(dns)
1755
-
1756
- def get_network_dns(self) -> List[NetworkDNS]:
1757
- """
1758
- This method returns the network dns
1759
- :return: The list of network dns
1760
- """
1761
- return self.dns_netflows
1762
-
1763
- def get_domain_by_destination_ip(self, ip: str) -> Optional[str]:
1764
- """
1765
- This method returns domains associated with a given destination IP
1766
- :param ip: The IP for which an associated domain is requested
1767
- :return: The domain associated with the given destination IP
1768
- """
1769
- domains = [dns.domain for dns in self.dns_netflows if ip in dns.resolved_ips]
1770
- if domains:
1771
- return domains[0]
1772
- else:
1773
- return None
1774
-
1775
- def get_destination_ip_by_domain(self, domain: str) -> Optional[str]:
1776
- """
1777
- This method returns a destination ip associated with a given domain
1778
- :param domain: The domain for which an associated IP is requested
1779
- :return: The IP associated with the given domain
1780
- """
1781
- ips = [dns.resolved_ips[0] for dns in self.dns_netflows if domain == dns.domain]
1782
- if ips:
1783
- return ips[0]
1784
- else:
1785
- return None
1786
-
1787
- # NetworkHTTP manipulation methods
1788
- def set_http_netflows(self, network_http: List[NetworkHTTP]) -> None:
1789
- """
1790
- This method sets the NetworkHTTP objects. Note that a http_netflow == NetworkHTTP
1791
- :param network_http: The NetworkHTTPs to set
1792
- :return: None
1793
- """
1794
- self.http_netflows: List[NetworkHTTP] = (
1795
- network_http
1796
- if isinstance(network_http, List)
1797
- and all(isinstance(http, NetworkHTTP) for http in network_http)
1798
- else []
1799
- )
1800
-
1801
- def create_network_http(self, **kwargs) -> NetworkHTTP:
1802
- """
1803
- This method creates a NetworkHTTP object, assigns its attributes based on keyword arguments provided,
1804
- and returns the NetworkHTTP object
1805
- :param kwargs: Key word arguments to be used for updating the NetworkHTTP object's attributes
1806
- :return: NetworkHTTP object
1807
- """
1808
- if not (kwargs.get("request_uri") and kwargs.get("request_method")):
1809
- raise ValueError("The network http connection needs its required arguments")
1810
- network_http = NetworkHTTP(kwargs["request_uri"], kwargs["request_method"])
1811
- update_object_items(network_http, kwargs)
1812
- return network_http
1813
-
1814
- def add_network_http(self, http: NetworkHTTP) -> None:
1815
- """
1816
- This method adds a NetworkHTTP object to the list of network HTTP calls
1817
- :param http: The NetworkHTTP object to be added
1818
- :return: None
1819
- """
1820
- self.http_netflows.append(http)
1821
-
1822
- def get_network_http(self) -> List[NetworkHTTP]:
1823
- """
1824
- This method returns the network HTTP
1825
- :return: The list of network HTTP
1826
- """
1827
- return self.http_netflows
1828
-
1829
- def get_network_http_by_path(self, path: str) -> Optional[NetworkHTTP]:
1830
- """
1831
- This method returns the network HTTP call associated with a path
1832
- :param path: The path to a response/request body file
1833
- :return: The associated network HTTP call for the given path
1834
- """
1835
- network_http_with_path = [
1836
- http
1837
- for http in self.get_network_http()
1838
- if http.response_body_path == path or http.request_body_path == path
1839
- ]
1840
- if not network_http_with_path:
1841
- return None
1842
- else:
1843
- return network_http_with_path[0]
1844
-
1845
- def get_network_http_by_details(
1846
- self, request_uri: str, request_method: str, request_headers: Dict[str, str]
1847
- ) -> Optional[NetworkHTTP]:
1848
- """
1849
- This request_method gets a network http call by request URI, request_method and request headers
1850
- :param request_uri: The URI of the request
1851
- :param request_method: The request_method used for the HTTP request
1852
- :param request_headers: The headers of the request
1853
- :return: The network http call (should one exist) that matches these details
1854
- """
1855
- network_http_with_details = [
1856
- http
1857
- for http in self.get_network_http()
1858
- if http.request_uri == request_uri
1859
- and http.request_method == request_method
1860
- and http.request_headers == request_headers
1861
- ]
1862
- if not network_http_with_details:
1863
- return None
1864
- else:
1865
- return network_http_with_details[0]
1866
-
1867
- def get_network_connection_by_network_http(self, network_http: NetworkHTTP) -> Optional[NetworkConnection]:
1868
- """
1869
- This method returns the network connection corresponding to the given network http object
1870
- :param network_http: The given network http object
1871
- :return: The corresponding network connection
1872
- """
1873
- for netflow in self.netflows:
1874
- if netflow.http_details == network_http:
1875
- return netflow
1876
-
1877
- return None
1878
-
1879
- # Process manipulation methods
1880
- def set_processes(self, processes: List[Process]) -> None:
1881
- """
1882
- This method sets the Process objects.
1883
- :param processes: The Processes to set
1884
- :return: None
1885
- """
1886
- self.processes: List[Process] = (
1887
- processes
1888
- if isinstance(processes, List)
1889
- and all(isinstance(process, Process) for process in processes)
1890
- else []
1891
- )
1892
-
1893
- def create_process(self, **kwargs) -> Process:
1894
- """
1895
- This method creates a Process object, assigns its attributes based on keyword arguments provided,
1896
- and returns the Process object
1897
- :param kwargs: Key word arguments to be used for updating the Process object's attributes
1898
- :return: Process object
1899
- """
1900
- if not (
1901
- kwargs.get("objectid") and kwargs.get("image") and kwargs.get("start_time")
1902
- ):
1903
- raise ValueError("The process needs its required arguments")
1904
- process = Process(kwargs["objectid"], kwargs["image"], kwargs["start_time"])
1905
- process.update(**kwargs)
1906
-
1907
- if not process.objectid.guid:
1908
- process.objectid.assign_guid()
1909
- if not process.end_time:
1910
- process.set_end_time(MAX_TIME)
1911
- if not process.objectid.time_observed:
1912
- process.objectid.set_time_observed(process.start_time)
1913
- return process
1914
-
1915
- def add_process(self, process: Process) -> None:
1916
- """
1917
- This method adds a validated Process object to the list of processes
1918
- :param process: The Process object to be added
1919
- :return: None
1920
- """
1921
- if self._validate_process(process):
1922
- if isinstance(process.objectid.guid, str):
1923
- self._guid_process_map[process.objectid.guid.upper()] = process
1924
- else:
1925
- self._guid_process_map[process.objectid.guid] = process
1926
- self.set_parent_details(process)
1927
- self.set_child_details(process)
1928
- self.processes.append(process)
1929
- else:
1930
- log.debug("Invalid process, ignoring...")
1931
- return
1932
-
1933
- def update_process(self, **kwargs) -> None:
1934
- """
1935
- This method updates a Process object attributes
1936
- :param kwargs: Key word arguments to be used for updating the Process object's attributes
1937
- :return: None
1938
- """
1939
- if all(value is None for value in kwargs.values()):
1940
- return
1941
-
1942
- if "guid" not in kwargs and "pid" not in kwargs:
1943
- log.warning(
1944
- "You must pass GUID kwarg or a PID kwarg if you want to update a process"
1945
- )
1946
- return
1947
- elif (
1948
- "guid" not in kwargs
1949
- and "pid" in kwargs
1950
- and not ("start_time" in kwargs or "end_time" in kwargs)
1951
- ):
1952
- log.warning(
1953
- "You must pass GUID kwarg or a PID kwarg with a timestamp such as start_time or end_time if you want to update a process."
1954
- )
1955
- return
1956
-
1957
- # Don't update the parent yet
1958
- parent_keys = [
1959
- "pguid",
1960
- "ptag",
1961
- "ptreeid",
1962
- "pprocesstree",
1963
- "ptime_observed",
1964
- "ppid",
1965
- "pimage",
1966
- "pcommand_line",
1967
- "pobjectid",
1968
- ]
1969
- parent_kwargs = {
1970
- key[1:]: value for key, value in kwargs.items() if key in parent_keys
1971
- }
1972
-
1973
- if "guid" in kwargs and kwargs["guid"]:
1974
- process_to_update = self.get_process_by_guid(kwargs["guid"])
1975
- if not process_to_update:
1976
- p = self.create_process(**kwargs)
1977
- self.add_process(p)
1978
- return
1979
- process_to_update.update(**kwargs)
1980
- else:
1981
- timestamp = (
1982
- kwargs["end_time"] if kwargs.get("end_time") else kwargs["start_time"]
1983
- )
1984
- if not isinstance(timestamp, str):
1985
- raise ValueError(f"The timestamp {timestamp} must be a str")
1986
-
1987
- guid = self.get_guid_by_pid_and_time(kwargs["pid"], timestamp)
1988
- if not guid:
1989
- p = self.create_process(**kwargs)
1990
- self.add_process(p)
1991
- return
1992
- process_to_update = self.get_process_by_guid(guid)
1993
- kwargs["guid"] = guid
1994
- if process_to_update:
1995
- process_to_update.update(**kwargs)
1996
-
1997
- if parent_kwargs.get("guid") or parent_kwargs.get("pobjectid", {}).get("guid"):
1998
- # Only update if ObjectID is not associated with another process
1999
- if process_to_update and any(
2000
- process_to_update.pobjectid == process.objectid
2001
- for process in self.get_processes()
2002
- ):
2003
- return
2004
- pguid = (
2005
- parent_kwargs["guid"]
2006
- if parent_kwargs.get("guid")
2007
- else parent_kwargs.get("pobjectid", {}).get("guid")
2008
- )
2009
- parent = self.get_process_by_guid(pguid)
2010
- if process_to_update and parent:
2011
- process_to_update.set_parent(parent)
2012
-
2013
- def update_objectid(self, **kwargs) -> None:
2014
- """
2015
- This method updates an object's ObjectID attributes
2016
- :param kwargs: Key word arguments to be used for updating the object's ObjectID attributes
2017
- :return: None
2018
- """
2019
- if all(value is None for value in kwargs.values()):
2020
- return
2021
-
2022
- if "guid" not in kwargs:
2023
- log.warning(
2024
- "You must pass GUID kwarg if you want to update a process ObjectID."
2025
- )
2026
- return
2027
-
2028
- object_to_update = self.get_process_by_guid(kwargs["guid"])
2029
- if not object_to_update:
2030
- object_to_update = self.get_network_connection_by_guid(kwargs["guid"])
2031
- if not object_to_update:
2032
- return
2033
-
2034
- update_object_items(object_to_update.objectid, kwargs)
2035
-
2036
- def set_parent_details(self, process: Process) -> None:
2037
- """
2038
- This method sets the parent process's details in the given process
2039
- :param process: The process that will have it's parent's details set
2040
- :return: None
2041
- """
2042
- parent = None
2043
- if process.pobjectid and process.pobjectid.guid:
2044
- parent = self.get_process_by_guid(process.pobjectid.guid)
2045
- process.set_parent(parent)
2046
-
2047
- if not parent and process.ppid and process.start_time:
2048
- parent_guid = self.get_guid_by_pid_and_time(
2049
- process.ppid, process.start_time
2050
- )
2051
- parent = self.get_process_by_guid(parent_guid)
2052
- process.set_parent(parent)
2053
-
2054
- def set_child_details(self, process: Process) -> None:
2055
- """
2056
- This method sets the parent process details for any child processes of the given process
2057
- :param process: The parent process that will be set as the parent for any associated child processes
2058
- :return: None
2059
- """
2060
- if process.objectid.guid:
2061
- child_processes = self.get_processes_by_pguid(process.objectid.guid)
2062
- for child_process in child_processes:
2063
- child_process.set_parent(process)
2064
- # Processes may not have a pguid attribute set, so this is not an elif case
2065
- if process.pid and process.start_time:
2066
- child_processes = self.get_processes_by_ppid_and_time(
2067
- process.pid, process.start_time
2068
- )
2069
- for child_process in child_processes:
2070
- child_process.set_parent(process)
2071
-
2072
- def get_processes(self) -> List[Process]:
2073
- """
2074
- This method is a getter for the processes attribute
2075
- :return: The value of the processes attribute
2076
- """
2077
- return self.processes
2078
-
2079
- def get_guid_by_pid_and_time(self, pid: int, timestamp: str) -> Optional[str]:
2080
- """
2081
- This method allows the retrieval of GUIDs based on a process ID and timestamp
2082
- :param pid: The process ID
2083
- :param timestamp: A timestamp between the creation and termination of a process
2084
- :return: The GUID for the given process ID
2085
- """
2086
- process = self.get_process_by_pid_and_time(pid, timestamp)
2087
- if process:
2088
- return process.objectid.guid
2089
- else:
2090
- return None
2091
-
2092
- def get_processes_by_ppid_and_time(
2093
- self, ppid: int, timestamp: str
2094
- ) -> List[Process]:
2095
- """
2096
- This method allows the retrieval of processes based on a parent process ID and timestamp
2097
- :param ppid: The parent process ID
2098
- :param timestamp: A timestamp between the creation and termination of a process
2099
- :return: The child processes associated for the given parent process ID
2100
- """
2101
- if timestamp is None:
2102
- return None
2103
- return [
2104
- process
2105
- for process in self.get_processes()
2106
- if process.ppid == ppid
2107
- and timestamp <= process.end_time
2108
- and timestamp >= process.start_time
2109
- ]
2110
-
2111
- def get_pguid_by_pid_and_time(self, pid: int, timestamp: str) -> Optional[str]:
2112
- """
2113
- This method allows the retrieval of the parent process's GUID based on a process ID and timestamp
2114
- :param pid: The process ID
2115
- :param timestamp: A timestamp between the creation and termination of a process
2116
- :return: The parent process's GUID for the given process ID
2117
- """
2118
- process = self.get_process_by_pid_and_time(pid, timestamp)
2119
- if process and process.pobjectid:
2120
- return process.pobjectid.guid
2121
- else:
2122
- return None
2123
-
2124
- def is_guid_in_gpm(self, guid: str) -> bool:
2125
- """
2126
- This method confirms if a GUID is in the GUID -> Process map
2127
- :return: A boolean indicating if a GUID is in the GUID -> Process map
2128
- """
2129
- return f"{{{str(UUID(guid)).upper()}}}" in self._get_guids()
2130
-
2131
- def get_process_by_guid(self, guid: Optional[str]) -> Optional[Process]:
2132
- """
2133
- This method takes a given GUID and returns the associated process
2134
- :param guid: The given GUID that we want an associated process for
2135
- :return: The associated process
2136
- """
2137
- if guid is None:
2138
- return None
2139
- return self._guid_process_map.get(guid.upper())
2140
-
2141
- def get_process_by_command_line(
2142
- self, command_line: Optional[str] = None
2143
- ) -> Optional[Process]:
2144
- """
2145
- This method takes a given command line and returns the associated process
2146
- NOTE That this method has a high possibility of not being accurate. If multiple processes use the same
2147
- command line, this method will return the first process.
2148
- :param command_line: The given command line that we want an associated process for
2149
- :return: The associated process
2150
- """
2151
- if not command_line:
2152
- return None
2153
-
2154
- processes_with_command_line = [
2155
- process
2156
- for process in self.get_processes()
2157
- if process.command_line
2158
- and (
2159
- command_line == process.command_line
2160
- or command_line in process.command_line
2161
- )
2162
- ]
2163
- if not processes_with_command_line:
2164
- return None
2165
- else:
2166
- return processes_with_command_line[0]
2167
-
2168
- def get_process_by_pid_and_time(
2169
- self, pid: Optional[int], timestamp: Optional[str]
2170
- ) -> Optional[Process]:
2171
- """
2172
- This method allows the retrieval of a process based on a process ID and timestamp
2173
- :param pid: The process ID
2174
- :param timestamp: A timestamp between the creation and termination of a process
2175
- :return: The process for the given process ID
2176
- """
2177
- if pid is None or timestamp is None:
2178
- return None
2179
- processes: List[Process] = [
2180
- process
2181
- for process in self.get_processes()
2182
- if process.pid == pid
2183
- and timestamp <= process.end_time
2184
- and timestamp >= process.start_time
2185
- ]
2186
- if not processes:
2187
- return None
2188
- elif len(processes) > 1:
2189
- log.warning("Map is invalid")
2190
- return None
2191
- else:
2192
- return processes[0]
2193
-
2194
- def get_processes_by_pguid(self, pguid: Optional[str]) -> List[Process]:
2195
- """
2196
- This method takes a given parent process GUID and returns the child processes
2197
- :param guid: The given parent process GUID that we want the child processes for
2198
- :return: The child processes
2199
- """
2200
- if pguid is None:
2201
- return []
2202
- return [
2203
- process
2204
- for process in self.get_processes()
2205
- if process.pobjectid and process.pobjectid.guid == pguid
2206
- ]
2207
-
2208
- def get_process_by_pid(self, pid: Optional[int] = None) -> Optional[Process]:
2209
- """
2210
- This method takes a given process ID and returns the associated process
2211
- NOTE That this method has a high possibility of not being accurate. If multiple processes use the same
2212
- process ID, this method will return the first process.
2213
- :param pid: The given process ID that we want an associated process for
2214
- :return: The associated process
2215
- """
2216
- if not pid:
2217
- return None
2218
-
2219
- processes_with_pid = [
2220
- process
2221
- for process in self.get_processes()
2222
- if process.pid and pid == process.pid
2223
- ]
2224
- if not processes_with_pid:
2225
- return None
2226
- else:
2227
- return processes_with_pid[0]
2228
-
2229
- def as_primitives(self) -> Dict[str, Any]:
2230
- """
2231
- This method returns the dictionary representation of the object
2232
- :return: The dictionary representation of the object
2233
- """
2234
- return {
2235
- "sandboxes": [sandbox.as_primitives() for sandbox in self.sandboxes],
2236
- "signatures": [signature.as_primitives() for signature in self.signatures],
2237
- "network_connections": [
2238
- network_connection.as_primitives()
2239
- for network_connection in self.netflows
2240
- ],
2241
- "network_dns": [
2242
- network_dns.as_primitives() for network_dns in self.dns_netflows
2243
- ],
2244
- "network_http": [
2245
- network_http.as_primitives() for network_http in self.http_netflows
2246
- ],
2247
- "processes": [process.as_primitives() for process in self.processes],
2248
- }
2249
-
2250
- # Process Tree and Event manipulation methods
2251
- def get_events(
2252
- self, safelist: List[str] = None
2253
- ) -> List[Union[Process, NetworkConnection]]:
2254
- """
2255
- This method gets all process and network events, sorts them by time observed, and returns a list
2256
- :param safelist: A list of safe treeids
2257
- :return: A sorted list of all process and network events
2258
- """
2259
- if safelist is None:
2260
- safelist: List[str] = []
2261
-
2262
- processes_to_add = [
2263
- process
2264
- for process in self.processes
2265
- if process.start_time is not None
2266
- and process.objectid.treeid not in safelist
2267
- ]
2268
- netflows_to_add = [
2269
- network_connection
2270
- for network_connection in self.netflows
2271
- if network_connection.objectid.time_observed is not None
2272
- and network_connection.objectid.treeid not in safelist
2273
- ]
2274
- events = processes_to_add + netflows_to_add
2275
- return self._sort_things_by_time_observed(events)
2276
-
2277
- def get_non_safelisted_processes(self, safelist: List[str]) -> List[Process]:
2278
- """
2279
- This method filters events by their tree ID and returns the remaining events
2280
- :param safelist: All of the safe leaf tree IDs (the safelist)
2281
- :return: A list of non-safelisted process
2282
- """
2283
- # NOTE: This method must be called once tree IDs have been added to the process_event_dicts, most likely
2284
- # through calculating the process tree
2285
- filtered_processes = [
2286
- process
2287
- for process in self.get_processes()
2288
- if process.objectid.treeid not in safelist
2289
- ]
2290
- sorted_filtered_processes = self._sort_things_by_time_observed(
2291
- filtered_processes
2292
- )
2293
- return sorted_filtered_processes
2294
-
2295
- def get_process_tree(self, safelist: List[str] = None) -> List[Dict[str, Any]]:
2296
- """
2297
- This method generates the event tree
2298
- :return: The event tree
2299
- """
2300
- if safelist is None:
2301
- safelist: List[str] = []
2302
- events = self.get_events()
2303
- events_dict = self._convert_events_to_dict(events)
2304
- tree = self._convert_events_dict_to_tree(events_dict)
2305
- self._create_treeids(tree)
2306
- if safelist:
2307
- tree = OntologyResults._filter_event_tree_against_safe_treeids(
2308
- tree, safelist
2309
- )
2310
- return tree
2311
-
2312
- def get_process_tree_result_section(
2313
- self, safelist: List[str] = None
2314
- ) -> ResultProcessTreeSection:
2315
- """
2316
- This method creates the Typed ResultSection for Process (Event) Trees
2317
- :param safelist: A safelist of tree IDs that is to be applied to the events
2318
- :return: The Typed ResultSection for the Process (Event) Tree
2319
- """
2320
- if safelist is None:
2321
- safelist: List[str] = []
2322
- tree = self.get_process_tree(safelist)
2323
- items: List[ProcessItem] = []
2324
- process_tree_result_section = ResultProcessTreeSection("Spawned Process Tree")
2325
- for event in tree:
2326
- # A telltale sign that the event is a NetworkConnection
2327
- if "process" in event:
2328
- # event is a NetworkConnection, we don't want this in the process tree result section, only the counts
2329
- continue
2330
- self._convert_event_tree_to_result_section(
2331
- items, event, safelist, process_tree_result_section
2332
- )
2333
- for item in items:
2334
- process_tree_result_section.add_process(item)
2335
- return process_tree_result_section
2336
-
2337
- def load_from_json(self, json: Dict[str, Any]) -> None:
2338
- """
2339
- This method takes a given json and sets the corresponding attributes to those values
2340
- :param json: The the given json representation of the sandbox ontology
2341
- :return: None
2342
- """
2343
- self.analysis_metadata.load_from_json(json["analysis_metadata"])
2344
- for signature in json["signatures"]:
2345
- self.signatures.append(self._load_signature_from_json(signature))
2346
- for network_connection in json["network_connections"]:
2347
- self.network_connections.append(
2348
- self._load_network_connection_from_json(network_connection)
2349
- )
2350
- for dns in json["network_dns"]:
2351
- self.network_dns.append(self._load_network_dns_from_json(dns))
2352
- for http in json["network_http"]:
2353
- self.network_http.append(self._load_network_http_from_json(http))
2354
- for process in json["processes"]:
2355
- self.processes.append(self._load_process_from_json(process))
2356
- self.sandbox_name = json["sandbox_name"]
2357
- self.sandbox_version = json["sandbox_version"]
2358
-
2359
- @staticmethod
2360
- def handle_artifacts(
2361
- artifact_list: List[Dict[str, Any]],
2362
- request: ServiceRequest,
2363
- collapsed: bool = False,
2364
- injection_heur_id: int = 17,
2365
- ) -> ResultSection:
2366
- """
2367
- Goes through each artifact in artifact_list, uploading them and adding result sections accordingly
2368
- :param artifact_list: List of dictionaries that each represent an artifact
2369
- :param collapsed: A flag used for indicating if the Sandbox Artifacts ResultSection should be collapsed or not
2370
- :param injection_heur_id: The heuristic ID for the Injection heuristic of a service
2371
- :return: A ResultSection containing any Artifact ResultSections
2372
- """
2373
-
2374
- validated_artifacts = OntologyResults._validate_artifacts(artifact_list)
2375
-
2376
- artifacts_result_section = ResultSection(
2377
- "Sandbox Artifacts", auto_collapse=collapsed
2378
- )
2379
-
2380
- for artifact in validated_artifacts:
2381
- OntologyResults._handle_artifact(
2382
- artifact, artifacts_result_section, injection_heur_id
2383
- )
2384
-
2385
- if artifact.to_be_extracted and not any(artifact.sha256 == previously_extracted["sha256"] for previously_extracted in request.extracted):
2386
- try:
2387
- request.add_extracted(
2388
- artifact.path, artifact.name, artifact.description
2389
- )
2390
- except MaxExtractedExceeded:
2391
- # To avoid errors from being raised when too many files have been extracted
2392
- pass
2393
- elif not artifact.to_be_extracted and not any(artifact.sha256 == previously_supplemented["sha256"] for previously_supplemented in request.task.supplementary):
2394
- request.add_supplementary(
2395
- artifact.path, artifact.name, artifact.description
2396
- )
2397
-
2398
- return (
2399
- artifacts_result_section if artifacts_result_section.subsections else None
2400
- )
2401
-
2402
- def _get_guids(self) -> List[str]:
2403
- """
2404
- This method gets a list of GUIDs from the GUID - PID map
2405
- :return: A list of GUIDs
2406
- """
2407
- return list(self._guid_process_map.keys())
2408
-
2409
- def _validate_process(self, process: Process) -> bool:
2410
- """
2411
- This method validates a Process object
2412
- :param process: A Process object to be validated
2413
- :return: A boolean flag indicating that Process is valid
2414
- """
2415
- # Grab pids and guids to use for validation
2416
- pids: List[int] = [
2417
- process.pid
2418
- for process in self._guid_process_map.values()
2419
- if process.pid is not None
2420
- ]
2421
- guids: List[str] = list(self._guid_process_map.keys())
2422
-
2423
- if process.objectid.guid is None and process.pid is None:
2424
- log.warning("Process requires at least a GUID or a PID, skipping...")
2425
- return False
2426
- # elif not process.objectid.guid and process.pid not in pids:
2427
- # # This means we have a unique process that is not yet in the lookup table.
2428
- # # Before we add it, assign a GUID to it.
2429
- # process.objectid.assign_guid()
2430
- elif process.objectid.guid in guids and process.pid in pids:
2431
- # We cannot have two items in the table that share process IDs and GUIDs
2432
- log.debug("Duplicate process, skipping...")
2433
- return False
2434
- elif process.objectid.guid in guids and process.pid not in pids:
2435
- # We cannot have two items in the table that share GUIDs
2436
- log.debug("Duplicate process, skipping...")
2437
- return False
2438
- elif process.objectid.guid not in guids and process.pid in pids:
2439
- # We can have two items in the table that share PIDs that don't share GUIDs
2440
- # Further validation is required
2441
- return self._handle_pid_match(process)
2442
- else:
2443
- # process.guid and process.guid not in guids and process.pid not in pids
2444
- # We have a unique process that is not yet in the lookup table and has a GUID.
2445
- # Add it!
2446
- pass
2447
- return True
2448
-
2449
- def _handle_pid_match(self, process: Process) -> bool:
2450
- """
2451
- This method is a deeper step in process validation for processes that share IDs
2452
- :param process: A Process object that shares an ID with another Process object in the lookup table
2453
- :return: A boolean indicating if process is a valid entry
2454
- """
2455
- valid_entry = False
2456
- # We only care about processes that share process IDs
2457
- processes_with_common_pids = [
2458
- validated_process
2459
- for validated_process in self.processes
2460
- if validated_process.pid == process.pid
2461
- ]
2462
-
2463
- if not processes_with_common_pids:
2464
- return True
2465
-
2466
- for process_with_common_pid in processes_with_common_pids:
2467
- if (
2468
- process_with_common_pid.start_time == process.start_time
2469
- and process_with_common_pid.end_time == process.end_time
2470
- ):
2471
- # We cannot have multiple processes that share IDs that took place at the same time
2472
- continue
2473
- elif (
2474
- process.start_time >= process_with_common_pid.end_time
2475
- or process.end_time <= process_with_common_pid.start_time
2476
- ):
2477
- # We can only have multiple processes that share IDs if they did not take place at the same time
2478
- valid_entry = True
2479
- else:
2480
- # We cannot have multiple processes that share IDs that have overlapping time ranges
2481
- continue
2482
- return valid_entry
2483
-
2484
- def _remove_process(self, process: Process) -> None:
2485
- """
2486
- This method takes a process and removes it from the current processes, if it exists
2487
- :param process: The process to be removed
2488
- :return: None
2489
- """
2490
- try:
2491
- self.processes.remove(process)
2492
- except ValueError:
2493
- return
2494
-
2495
- def _remove_network_http(self, network_http: NetworkHTTP) -> None:
2496
- """
2497
- This method takes a network_http and removes it from the current network_http calls, if it exists
2498
- :param network_http: The network_http to be removed
2499
- :return: None
2500
- """
2501
- try:
2502
- self.http_netflows.remove(network_http)
2503
- except ValueError:
2504
- return
2505
-
2506
- def _remove_network_dns(self, network_dns: NetworkDNS) -> None:
2507
- """
2508
- This method takes a network_dns and removes it from the current network_dns calls, if it exists
2509
- :param network_dns: The network_dns to be removed
2510
- :return: None
2511
- """
2512
- try:
2513
- self.dns_netflows.remove(network_dns)
2514
- except ValueError:
2515
- return
2516
-
2517
- def _remove_network_connection(self, network_connection: NetworkConnection) -> None:
2518
- """
2519
- This method takes a network_connection and removes it from the current network_connections, if it exists
2520
- :param network_connection: The network_connection to be removed
2521
- :return: None
2522
- """
2523
- try:
2524
- self.netflows.remove(network_connection)
2525
- except ValueError:
2526
- return
2527
-
2528
- def _remove_signature(self, signature: Signature) -> None:
2529
- """
2530
- This method takes a signature and removes it from the current signatures, if it exists
2531
- :param signature: The signature to be removed
2532
- :return: None
2533
- """
2534
- try:
2535
- self.signatures.remove(signature)
2536
- except ValueError:
2537
- return
2538
-
2539
- def _load_process_from_json(self, json: Dict[str, Any]) -> Process:
2540
- """
2541
- This method takes a given json and sets the corresponding attributes to those values
2542
- :param json: The the given json representation of the process
2543
- :return: A process object
2544
- """
2545
- process = self.create_process(**json)
2546
- return process
2547
-
2548
- def _load_signature_from_json(self, json: Dict[str, Any]) -> Signature:
2549
- """
2550
- This method takes a given json and sets the corresponding attributes to those values
2551
- :param json: The the given json representation of the signature
2552
- :return: A signature object
2553
- """
2554
- process = json.pop("process")
2555
- subjects = json.pop("subjects")
2556
- signature = self.create_signature(**json)
2557
- if process:
2558
- signature.update_process(**process)
2559
- if subjects:
2560
- for subject in subjects:
2561
- subject_process = subject.pop("process")
2562
- if subject_process:
2563
- signature.add_process_subject(**subject_process)
2564
- else:
2565
- signature.add_subject(**subject)
2566
- return signature
2567
-
2568
- def _load_network_connection_from_json(
2569
- self, json: Dict[str, Any]
2570
- ) -> NetworkConnection:
2571
- """
2572
- This method takes a given json and sets the corresponding attributes to those values
2573
- :param json: The the given json representation of the network connection
2574
- :return: A network connection object
2575
- """
2576
- process = json.pop("process")
2577
- network_connection = self.create_network_connection(**json)
2578
- if process:
2579
- network_connection.update_process(**process)
2580
- return network_connection
2581
-
2582
- def _load_network_dns_from_json(self, json: Dict[str, Any]) -> NetworkDNS:
2583
- """
2584
- This method takes a given json and sets the corresponding attributes to those values
2585
- :param json: The the given json representation of the network dns
2586
- :return: A network dns object
2587
- """
2588
- connection_details = json.pop("connection_details")
2589
- network_dns = self.create_network_dns(**json)
2590
- if connection_details:
2591
- process = connection_details.pop("process")
2592
- network_dns.connection_details.update(**connection_details)
2593
- if process:
2594
- network_dns.update_process(**process)
2595
- return network_dns
2596
-
2597
- def _load_network_http_from_json(self, json: Dict[str, Any]) -> NetworkHTTP:
2598
- """
2599
- This method takes a given json and sets the corresponding attributes to those values
2600
- :param json: The the given json representation of the network http
2601
- :return: A network http object
2602
- """
2603
- connection_details = json.pop("connection_details")
2604
- network_http = self.create_network_http(**json)
2605
- if connection_details:
2606
- process = connection_details.pop("process")
2607
- network_http.connection_details.update(**connection_details)
2608
- if process:
2609
- network_http.update_process(**process)
2610
- return network_http
2611
-
2612
- @staticmethod
2613
- def _sort_things_by_time_observed(
2614
- things_to_sort_by_time_observed: List[Union[Process, NetworkConnection, Dict]]
2615
- ) -> List[Any]:
2616
- """
2617
- This method sorts a list of things by their time_observeds
2618
- :param things_to_sort_by_time_observed: A list of things to sort by time_observed
2619
- :return: A list of things that have been sorted by time_observed
2620
- """
2621
- if not things_to_sort_by_time_observed:
2622
- return []
2623
-
2624
- # If every item is a dictionary, then use key lookups
2625
- if all(
2626
- isinstance(thing_to_sort_by_time_observed, Dict)
2627
- for thing_to_sort_by_time_observed in things_to_sort_by_time_observed
2628
- ):
2629
-
2630
- if any(
2631
- thing_to_sort_by_time_observed["objectid"]["time_observed"] is None
2632
- for thing_to_sort_by_time_observed in things_to_sort_by_time_observed
2633
- ):
2634
- log.warning("All ObjectID time_observed values must not be None...")
2635
- return things_to_sort_by_time_observed
2636
-
2637
- def time_observed(x):
2638
- # We should only be sorting with floats
2639
- time_obs = x["objectid"]["time_observed"]
2640
- if isinstance(time_obs, str):
2641
- if time_obs == MIN_TIME:
2642
- time_obs = epoch_to_local(0)
2643
- time_obs = datetime.strptime(
2644
- time_obs, LOCAL_FMT
2645
- ).timestamp()
2646
- return time_obs
2647
-
2648
- else:
2649
-
2650
- if any(
2651
- thing_to_sort_by_time_observed.objectid.time_observed is None
2652
- for thing_to_sort_by_time_observed in things_to_sort_by_time_observed
2653
- ):
2654
- log.warning("All ObjectID time_observed values must not be None...")
2655
- return things_to_sort_by_time_observed
2656
-
2657
- def time_observed(x):
2658
- # We should only be sorting with floats
2659
- time_obs = x.objectid.time_observed
2660
- if isinstance(time_obs, str):
2661
- if time_obs == MIN_TIME:
2662
- time_obs = epoch_to_local(0)
2663
- time_obs = datetime.strptime(
2664
- time_obs, LOCAL_FMT
2665
- ).timestamp()
2666
- return time_obs
2667
-
2668
- sorted_things = sorted(things_to_sort_by_time_observed, key=time_observed)
2669
- return sorted_things
2670
-
2671
- @staticmethod
2672
- def _sort_things_by_relationship(
2673
- things_to_sort_by_relationship: List[Union[Process, NetworkConnection, Dict]]
2674
- ) -> List[Union[Process, NetworkConnection, Dict]]:
2675
- """
2676
- This method sorts a list of things by their relationships
2677
- :param things_to_sort_by_relationship: A list of things to sort by their relationships to one another
2678
- :return: A list of things that have been sorted by their relationships
2679
- """
2680
- if not things_to_sort_by_relationship:
2681
- return []
2682
-
2683
- recurse_again = False
2684
- # If every item is a dictionary, then use key lookups
2685
- if all(
2686
- isinstance(thing_to_sort, Dict)
2687
- for thing_to_sort in things_to_sort_by_relationship
2688
- ):
2689
- for index, thing in enumerate(things_to_sort_by_relationship[:]):
2690
- # Confirm if we are working with an process or a network
2691
- if "pobjectid" in thing:
2692
- # This is a Process
2693
- pobjectid = thing["pobjectid"]
2694
- elif "process" in thing and thing["process"]:
2695
- # This is a NetworkConnection
2696
- pobjectid = thing["process"]["objectid"]
2697
- else:
2698
- pobjectid = None
2699
-
2700
- if not pobjectid:
2701
- continue
2702
- # We only want to sort if the thing has the same time observed as its parent
2703
- if thing["objectid"]["time_observed"] != pobjectid["time_observed"]:
2704
- continue
2705
-
2706
- # If the parent object exists in the rest of the list
2707
- for parent_index, parent in enumerate(
2708
- things_to_sort_by_relationship[index + 1 :]
2709
- ):
2710
- if (
2711
- pobjectid["guid"] == parent["objectid"]["guid"]
2712
- and pobjectid["time_observed"]
2713
- == parent["objectid"]["time_observed"]
2714
- ):
2715
- popped_item = things_to_sort_by_relationship.pop(
2716
- index + 1 + parent_index
2717
- )
2718
- things_to_sort_by_relationship.insert(index, popped_item)
2719
- recurse_again = True
2720
- break
2721
- if recurse_again:
2722
- break
2723
- else:
2724
- for index, thing in enumerate(things_to_sort_by_relationship[:]):
2725
- # Confirm if we are working with an process or a network
2726
- if hasattr(thing, "pobjectid"):
2727
- # This is a Process
2728
- pobjectid = thing.pobjectid
2729
- elif hasattr(thing, "process") and thing.process:
2730
- # This is a NetworkConnection
2731
- pobjectid = thing.process.objectid
2732
- else:
2733
- pobjectid = None
2734
-
2735
- if not pobjectid:
2736
- continue
2737
- # We only want to sort if the thing has the same time observed as its parent
2738
- if thing.objectid.time_observed != thing.pobjectid.time_observed:
2739
- continue
2740
- # If the parent object exists in the rest of the list
2741
- for parent_index, parent in enumerate(
2742
- things_to_sort_by_relationship[index + 1 :]
2743
- ):
2744
- if thing.pobjectid.guid == parent.objectid.guid:
2745
- popped_item = things_to_sort_by_relationship.pop(
2746
- index + 1 + parent_index
2747
- )
2748
- things_to_sort_by_relationship.insert(index, popped_item)
2749
- recurse_again = True
2750
- break
2751
- if recurse_again:
2752
- break
2753
-
2754
- if recurse_again:
2755
- OntologyResults._sort_things_by_relationship(things_to_sort_by_relationship)
2756
- return things_to_sort_by_relationship
2757
-
2758
- @staticmethod
2759
- def _convert_events_to_dict(
2760
- events: List[Union[Process, NetworkConnection]]
2761
- ) -> Dict[str, Any]:
2762
- """
2763
- This method converts events to dictionaries
2764
- :param events: A list of validated event objects
2765
- :return: A dictionary representing the event objects
2766
- """
2767
- events_dict = {}
2768
-
2769
- if any([event.objectid.guid is None for event in events]):
2770
- log.warning("All events must have a GUID at the ObjectID level...")
2771
- return events_dict
2772
-
2773
- for event in events:
2774
- events_dict[event.objectid.guid] = event.as_primitives()
2775
-
2776
- return events_dict
2777
-
2778
- @staticmethod
2779
- def _depth(d: Dict[str, Any]) -> int:
2780
- """
2781
- This method uses recursion to determine the depth of a dictionary
2782
- :param d: The dictionary to determine the depth of
2783
- :return: The integer value representing the current depth at the current iteration
2784
- """
2785
- if isinstance(d, dict):
2786
- children = d.get("children", [])
2787
- if isinstance(children, list):
2788
- if not children:
2789
- return 1
2790
- return 1 + max(OntologyResults._depth(child) for child in children)
2791
- return 0
2792
-
2793
- @staticmethod
2794
- def _convert_events_dict_to_tree(
2795
- events_dict: Dict[str, Any] = None
2796
- ) -> List[Dict[str, Any]]:
2797
- """
2798
- This method converts a dictionary representing events into a tree by using pid/ppid or guid/pguid
2799
- pairs for linking
2800
- :param events_dict: A dictionary of events
2801
- :return: A list of event tree roots, each which their respective branches and leaves
2802
- """
2803
-
2804
- root = {
2805
- "children": [],
2806
- }
2807
- sorted_events = OntologyResults._sort_things_by_time_observed(
2808
- list(events_dict.values())
2809
- )
2810
- try:
2811
- # If events all have the same time observed, but there are child-parent relationships between events,
2812
- # we should order based on relationship
2813
- sorted_events_by_relationship_and_time = (
2814
- OntologyResults._sort_things_by_relationship(sorted_events)
2815
- )
2816
- except RecursionError:
2817
- log.error("Unable to sort events by relationship due to recursion error.")
2818
- sorted_events_by_relationship_and_time = sorted_events
2819
-
2820
- events_seen = []
2821
-
2822
- for e in sorted_events_by_relationship_and_time:
2823
- if "children" not in e:
2824
- e["children"] = []
2825
-
2826
- # This the main difference between Process and NetworkConnection
2827
- pguid = None
2828
- if "pobjectid" in e and e["pobjectid"] and e["pobjectid"]["guid"]:
2829
- # This is a Process
2830
- pguid = e["pobjectid"]["guid"]
2831
- elif "process" in e and e["process"] and e["process"]["objectid"]["guid"]:
2832
- # This is a NetworkConnection
2833
- pguid = e["process"]["objectid"]["guid"]
2834
-
2835
- if pguid and pguid in events_seen:
2836
- # Check if depth is too DEEP
2837
- if any(OntologyResults._depth(event_dict) >= PROCESS_TREE_DEPTH_LIMIT for event_dict in events_dict.values()):
2838
- # We still want to register the process in events_seen, so
2839
- # that they don't get added to the root children
2840
- pass
2841
- else:
2842
- events_dict[pguid]["children"].append(e)
2843
- else:
2844
- root["children"].append(e)
2845
-
2846
- events_seen.append(e["objectid"]["guid"])
2847
-
2848
- return OntologyResults._sort_things_by_time_observed(root["children"])
2849
-
2850
- def _convert_event_tree_to_result_section(
2851
- self,
2852
- items: List[ProcessItem],
2853
- event: Dict[str, Any],
2854
- safelist: List[str],
2855
- result_section: ResultProcessTreeSection,
2856
- parent: Optional[ProcessItem] = None,
2857
- ) -> None:
2858
- """
2859
- This method converts the event tree into a ResultSection using recursion
2860
- :param items: A list of ProcessItem objects
2861
- :param event: A dictionary representing the Process to be converted
2862
- :param safelist: A safelist of tree IDs that is to be applied to the events
2863
- :param result_section: The Typed ResultSection for the Process (Event) Tree
2864
- :param parent: The ProcessItem of the event to be converted
2865
- :return: None
2866
- """
2867
- e = ProcessItem(
2868
- pid=event["pid"],
2869
- name=event["image"],
2870
- cmd=event["command_line"],
2871
- )
2872
- e.add_network_events(len(self.get_network_connection_by_pid(e.pid)))
2873
- # TODO
2874
- # e.add_file_events(len(self.get_file_events_by_pid(e.pid)))
2875
- # e.add_registry_events(len(self.get_registry_events_by_pid(e.pid)))
2876
-
2877
- if event["objectid"]["treeid"] in safelist:
2878
- e.safelist()
2879
- else:
2880
- result_section.add_tag(
2881
- "dynamic.processtree_id", event["objectid"]["processtree"]
2882
- )
2883
- if event["command_line"]:
2884
- result_section.add_tag(
2885
- "dynamic.process.command_line", event["command_line"]
2886
- )
2887
-
2888
- for signature in self.get_signatures_by_pid(event["pid"]):
2889
- if signature.score is None:
2890
- signature.set_score(0)
2891
- e.add_signature(signature.name, signature.score)
2892
-
2893
- for child in event["children"][:]:
2894
- # A telltale sign that the event is a NetworkConnection
2895
- if "process" in child:
2896
- # event is a NetworkConnection, we don't want this in the process tree result section, only the counts
2897
- pass
2898
- else:
2899
- self._convert_event_tree_to_result_section(
2900
- items, child, safelist, result_section, parent=e
2901
- )
2902
- event["children"].remove(child)
2903
-
2904
- if not event["children"] and not parent:
2905
- items.append(e)
2906
- elif not event["children"] and parent:
2907
- parent.add_child_process(e)
2908
-
2909
- def _create_hashed_node(
2910
- self, parent_treeid: str, parent_processtree: str, node: Dict[str, Any]
2911
- ) -> None:
2912
- """
2913
- This method takes a single node and hashes node attributes.
2914
- Recurses through children to do the same.
2915
- :param parent_treeid: A string representing the tree id
2916
- :param parent_processtree: A string representing the rich id
2917
- :param node: A dictionary representing the node to hash
2918
- :return: None
2919
- """
2920
- children = node["children"]
2921
-
2922
- tag = node["objectid"].get("tag", "notag")
2923
- value_to_create_hash_from = (parent_treeid + tag).encode()
2924
- sha256sum = sha256(value_to_create_hash_from).hexdigest()
2925
- node["objectid"]["treeid"] = sha256sum
2926
-
2927
- if parent_processtree:
2928
- processtree = f"{parent_processtree}|{tag}"
2929
- elif node.get("pobjectid") and node["pobjectid"].get("processtree"):
2930
- processtree = f"{node['pobjectid']['processtree']}|{tag}"
2931
- elif node.get("pobjectid") and node["pobjectid"].get("tag"):
2932
- processtree = f"{node['pobjectid']['tag']}|{tag}"
2933
- else:
2934
- processtree = tag
2935
- node["objectid"]["processtree"] = processtree
2936
-
2937
- if node["objectid"].get("guid"):
2938
- self.update_objectid(
2939
- guid=node["objectid"]["guid"], treeid=sha256sum, processtree=processtree
2940
- )
2941
-
2942
- for child in children:
2943
- self._create_hashed_node(sha256sum, processtree, child)
2944
-
2945
- def _create_treeids(self, process_tree: List[Dict[str, Any]]) -> None:
2946
- """
2947
- This method creates tree IDs for each node in the process tree
2948
- :param process_tree: A list of dictionaries where each dictionary represents a root.
2949
- :return: None
2950
- """
2951
- for root in process_tree:
2952
- self._create_hashed_node("", "", root)
2953
-
2954
- @staticmethod
2955
- def _remove_safe_leaves_helper(
2956
- node: Dict[str, Any], safe_treeids: List[str]
2957
- ) -> Union[str, None]:
2958
- """
2959
- This method is used to recursively remove safe branches from the given node. It removes a branch from the leaf
2960
- up until it is reaches a node that is not safelisted
2961
- :param node: A dictionary of a process tree node (root)
2962
- :param safe_treeids: All of the safe leaf tree IDs (the safelist)
2963
- :return: Returns the string representing the node's hash for the purpose of recursive removal,
2964
- or returns None if the removal is complete
2965
- """
2966
- children: List[Dict[str, Any]] = node["children"]
2967
- num_removed = 0
2968
- len_of_children = len(children)
2969
- for index in range(len_of_children):
2970
- child_to_operate_on = children[index - num_removed]
2971
- hash_to_remove = OntologyResults._remove_safe_leaves_helper(
2972
- child_to_operate_on, safe_treeids
2973
- )
2974
- if (
2975
- hash_to_remove
2976
- and hash_to_remove == child_to_operate_on["objectid"]["treeid"]
2977
- ):
2978
- children.remove(child_to_operate_on)
2979
- num_removed += 1
2980
- # We need to overwrite the hash of the parent node with the hash to remove to that it will be
2981
- # removed from the tree as well.
2982
- if not children:
2983
- node["objectid"]["treeid"] = hash_to_remove
2984
-
2985
- if not children:
2986
- treeid = node["objectid"]["treeid"]
2987
- if treeid in safe_treeids:
2988
- return treeid
2989
- else:
2990
- return None
2991
-
2992
- @staticmethod
2993
- def _remove_safe_leaves(
2994
- process_tree: List[Dict[str, Any]], safe_treeids: List[str]
2995
- ) -> None:
2996
- """
2997
- This method checks each leaf's hash against the safe tree IDs and removes safe branches from the process tree
2998
- :param process_tree: A list of dictionaries where each dictionary represents a root.
2999
- :param safe_treeids: A list containing the tree IDs of each safe branch
3000
- :return: None
3001
- """
3002
- for root in process_tree[:]:
3003
- _ = OntologyResults._remove_safe_leaves_helper(root, safe_treeids)
3004
- if root["objectid"]["treeid"] in safe_treeids and not root["children"]:
3005
- process_tree.remove(root)
3006
-
3007
- @staticmethod
3008
- def _filter_event_tree_against_safe_treeids(
3009
- event_tree: List[Dict[str, Any]], safe_treeids: List[str]
3010
- ) -> List[Dict[str, Any]]:
3011
- """
3012
- This method takes an event tree and a list of safe process tree tree IDs, and filters out safe process roots
3013
- in the tree.
3014
- :param event_tree: A list of processes in a tree structure
3015
- :param safe_treeids: A List of tree IDs representing safe leaf nodes/branches
3016
- :return: A list of processes in a tree structure, with the safe branches filtered out
3017
- """
3018
- OntologyResults._remove_safe_leaves(event_tree, safe_treeids)
3019
- return event_tree
3020
-
3021
- @staticmethod
3022
- def _validate_artifacts(
3023
- artifact_list: List[Dict[str, Any]] = None
3024
- ) -> List[Artifact]:
3025
- """
3026
- This method validates a list of unvalidated artifacts
3027
- :param artifact_list: A list of unvalidated artifacts
3028
- :return: A list of validated artifacts
3029
- """
3030
- if artifact_list is None:
3031
- artifact_list = []
3032
-
3033
- validated_artifacts = []
3034
- for artifact in artifact_list:
3035
- validated_artifact = Artifact(
3036
- name=artifact["name"],
3037
- path=artifact["path"],
3038
- description=artifact["description"],
3039
- to_be_extracted=artifact["to_be_extracted"],
3040
- sha256=artifact["sha256"] if artifact.get("sha256") else get_sha256_for_file(artifact["path"])
3041
- )
3042
- validated_artifacts.append(validated_artifact)
3043
- return validated_artifacts
3044
-
3045
- @staticmethod
3046
- def _handle_artifact(
3047
- artifact: Artifact = None,
3048
- artifacts_result_section: ResultSection = None,
3049
- injection_heur_id: int = 17,
3050
- ) -> None:
3051
- """
3052
- This method handles a single artifact and creates a ResultSection for the artifact, if appropriate
3053
- :param artifact: An artifact object
3054
- :param artifacts_result_section: A master ResultSection that will contain the ResultSection created for the
3055
- given artifact
3056
- :param injection_heur_id: The heuristic ID for the Injection heuristic of a service
3057
- :return: None
3058
- """
3059
- if artifact is None:
3060
- raise Exception("Artifact cannot be None")
3061
-
3062
- artifact_result_section = None
3063
-
3064
- for regex in [HOLLOWSHUNTER_EXE_REGEX, HOLLOWSHUNTER_DLL_REGEX]:
3065
- pattern = compile(regex)
3066
- if pattern.match(artifact.name):
3067
-
3068
- artifact_result_section = next(
3069
- (
3070
- subsection
3071
- for subsection in artifacts_result_section.subsections
3072
- if subsection.title_text == HOLLOWSHUNTER_TITLE
3073
- ),
3074
- None,
3075
- )
3076
-
3077
- if artifact_result_section is None:
3078
- artifact_result_section = ResultSection(HOLLOWSHUNTER_TITLE)
3079
- artifact_result_section.set_heuristic(injection_heur_id)
3080
- artifact_result_section.add_line(
3081
- "HollowsHunter dumped the following:"
3082
- )
3083
-
3084
- artifact_result_section.add_line(f"\t- {artifact.name}")
3085
- artifact_result_section.add_tag(
3086
- "dynamic.process.file_name", artifact.name
3087
- )
3088
- # As of right now, heuristic ID 17 is associated with the Injection category in the Cuckoo service
3089
- if regex in [HOLLOWSHUNTER_EXE_REGEX]:
3090
- artifact_result_section.heuristic.add_signature_id(
3091
- "hollowshunter_exe"
3092
- )
3093
- elif regex in [HOLLOWSHUNTER_DLL_REGEX]:
3094
- artifact_result_section.heuristic.add_signature_id(
3095
- "hollowshunter_dll"
3096
- )
3097
-
3098
- if (
3099
- artifact_result_section is not None
3100
- and artifact_result_section not in artifacts_result_section.subsections
3101
- ):
3102
- artifacts_result_section.add_subsection(artifact_result_section)
3103
-
3104
- def _set_item_times(self, item: Union[Process, ObjectID]) -> None:
3105
- """
3106
- This method sets the item times to values that the ODM can handle
3107
- :param item: An item, either a Process or an ObjectID, whose times will be validated
3108
- :return: None
3109
- """
3110
- if item is None:
3111
- return
3112
- if isinstance(item, Process):
3113
- start_time = next(
3114
- (
3115
- sandbox.analysis_metadata.start_time
3116
- for sandbox in self.sandboxes
3117
- if sandbox.objectid.session == item.objectid.session
3118
- ),
3119
- None,
3120
- )
3121
- end_time = next(
3122
- (
3123
- sandbox.analysis_metadata.end_time
3124
- for sandbox in self.sandboxes
3125
- if sandbox.objectid.session == item.objectid.session
3126
- ),
3127
- None,
3128
- )
3129
- if start_time == MIN_TIME:
3130
- start_time = epoch_to_local(0)
3131
- if item.start_time == MIN_TIME:
3132
- item.set_start_time(start_time)
3133
- if item.end_time == MAX_TIME:
3134
- item.set_end_time(end_time)
3135
- if item.objectid.time_observed == MIN_TIME:
3136
- item.objectid.set_time_observed(start_time)
3137
- if item.objectid.time_observed == MAX_TIME:
3138
- item.objectid.set_time_observed(end_time)
3139
- if item.pobjectid and item.pobjectid.time_observed == MIN_TIME:
3140
- item.pobjectid.set_time_observed(start_time)
3141
- if item.pobjectid and item.pobjectid.time_observed == MAX_TIME:
3142
- item.pobjectid.set_time_observed(end_time)
3143
- elif isinstance(item, ObjectID):
3144
- start_time = next(
3145
- (
3146
- sandbox.analysis_metadata.start_time
3147
- for sandbox in self.sandboxes
3148
- if sandbox.objectid.session == item.session
3149
- ),
3150
- None,
3151
- )
3152
- end_time = next(
3153
- (
3154
- sandbox.analysis_metadata.end_time
3155
- for sandbox in self.sandboxes
3156
- if sandbox.objectid.session == item.session
3157
- ),
3158
- None,
3159
- )
3160
- if start_time == MIN_TIME:
3161
- start_time = epoch_to_local(0)
3162
- if item.time_observed == MIN_TIME:
3163
- item.set_time_observed(start_time)
3164
- elif item.time_observed == MAX_TIME:
3165
- item.set_time_observed(end_time)
3166
- else:
3167
- log.warning(f"Given object {item} is neither Process or ObjectID...")
3168
-
3169
- def _remove_safelisted_processes(
3170
- self, safelist: List[str], need_tree_id: bool = False
3171
- ) -> None:
3172
- """
3173
- This method removes all safelisted processes and all activities associated with those processes
3174
- :param need_tree_id:
3175
- :return: None
3176
- """
3177
- safelisted_processes = [
3178
- process
3179
- for process in self.get_processes()
3180
- if process.objectid.treeid in safelist
3181
- or (need_tree_id and process.objectid.treeid is None)
3182
- ]
3183
-
3184
- safelisted_network_connections = [
3185
- nc
3186
- for nc in self.get_network_connections()
3187
- if nc.process in safelisted_processes
3188
- ]
3189
- safelisted_network_http = [
3190
- nc.http_details for nc in safelisted_network_connections if nc.http_details
3191
- ]
3192
- safelisted_network_dns = [
3193
- nc.dns_details for nc in safelisted_network_connections if nc.dns_details
3194
- ]
3195
- safelisted_signatures = [
3196
- sig
3197
- for sig in self.get_signatures()
3198
- if any(
3199
- all(
3200
- attribute.source == safelisted_process.objectid
3201
- for attribute in sig.attributes
3202
- )
3203
- for safelisted_process in safelisted_processes
3204
- )
3205
- ]
3206
- # TODO Somehow get safelisted subjects
3207
- # safelisted_signatures = [sig for sig in self.get_signatures() if sig.process in safelisted_processes]
3208
- for safelisted_http in safelisted_network_http:
3209
- self._remove_network_http(safelisted_http)
3210
- for safelisted_dns in safelisted_network_dns:
3211
- self._remove_network_dns(safelisted_dns)
3212
- for safelisted_conn in safelisted_network_connections:
3213
- self._remove_network_connection(safelisted_conn)
3214
- for safelisted_signature in safelisted_signatures:
3215
- self._remove_signature(safelisted_signature)
3216
- for safelisted_process in safelisted_processes:
3217
- self._remove_process(safelisted_process)
3218
-
3219
- def preprocess_ontology(
3220
- self, safelist: List[str] = None, from_main: bool = False, so_json: str = None
3221
- ) -> None:
3222
- """
3223
- This method preprocesses the ontology before it gets validated by Assemblyline's base ODM
3224
- :param from_main: A boolean flag that indicates if this method is being run from __main__
3225
- :param so_json: The path to the json file that represents the Sandbox Ontology
3226
- :return: None
3227
- """
3228
- if safelist is None:
3229
- safelist: List[str] = []
3230
-
3231
- self._remove_safelisted_processes(safelist, need_tree_id=True)
3232
-
3233
- for process in self.get_processes():
3234
- self._set_item_times(process)
3235
-
3236
- for signature in self.get_signatures():
3237
- for subject in signature.get_attributes():
3238
- self._set_item_times(subject.source)
3239
-
3240
- for network_connection in self.get_network_connections():
3241
- self._set_item_times(network_connection.process)
3242
-
3243
-
3244
- def attach_dynamic_ontology(service: ServiceBase, ontres: OntologyResults) -> None:
3245
- """
3246
- This method takes a given service instance and an instance of the OntologyResults class and adds the ontologies
3247
- :param service: The service instance that will have ontologies added to it
3248
- :param ontres: The OntologyResults instance that contains the ontologies data
3249
- :return: None
3250
- """
3251
- [service.ontology.add_result_part(ProcessModel, process.as_primitives()) for process in ontres.get_processes()]
3252
- [service.ontology.add_result_part(SandboxModel, sandbox.as_primitives()) for sandbox in ontres.get_sandboxes()]
3253
- [service.ontology.add_result_part(SignatureModel, signature.as_primitives()) for signature in ontres.get_signatures()]
3254
- [service.ontology.add_result_part(NetworkConnectionModel, network_connection.as_primitives()) for network_connection in ontres.get_network_connections()]
3255
-
3256
-
3257
- def convert_sysmon_processes(
3258
- sysmon: List[Dict[str, Any]],
3259
- safelist: Dict[str, Dict[str, List[str]]],
3260
- ontres: OntologyResults,
3261
- ):
3262
- """
3263
- This method creates the GUID -> Process lookup table
3264
- :param sysmon: A list of processes observed during the analysis of the task by the Sysmon tool
3265
- :param safelist: A dictionary containing matches and regexes for use in safelisting values
3266
- :param ontres: The Ontology Results object instance
3267
- :return: None
3268
- """
3269
- session = ontres.sandboxes[-1].objectid.session
3270
- for event in sysmon:
3271
- event_id = int(event["System"]["EventID"])
3272
- # EventID 10: ProcessAccess causes too many misconfigurations of the process tree
3273
- if event_id == 10:
3274
- continue
3275
- process: Dict[str, str] = {}
3276
- event_data = event["EventData"]["Data"]
3277
- for data in event_data:
3278
- name = data["@Name"].lower()
3279
- text = data.get("#text")
3280
-
3281
- # Process Create and Terminate
3282
- if name == "utctime" and event_id in [1, 5]:
3283
- if "." in text:
3284
- text = text[:text.index(".")]
3285
- t = str(datetime.strptime(text, LOCAL_FMT))
3286
- if event_id == 1:
3287
- process["start_time"] = t
3288
- else:
3289
- process["start_time"] = MIN_TIME
3290
- process["end_time"] = t
3291
- elif name == "utctime":
3292
- if "." in text:
3293
- text = text[:text.index(".")]
3294
- t = str(datetime.strptime(text, LOCAL_FMT))
3295
- process["time_observed"] = t
3296
- elif name in ["sourceprocessguid", "parentprocessguid"]:
3297
- process["pguid"] = text
3298
- elif name in ["processguid", "targetprocessguid"]:
3299
- process["guid"] = text
3300
- elif name in ["parentprocessid", "sourceprocessid"]:
3301
- process["ppid"] = int(text)
3302
- elif name in ["processid", "targetprocessid"]:
3303
- process["pid"] = int(text)
3304
- elif name in ["sourceimage"]:
3305
- process["pimage"] = text
3306
- elif name in ["image", "targetimage"]:
3307
- if not is_tag_safelisted(text, ["dynamic.process.file_name"], safelist):
3308
- process["image"] = text
3309
- elif name in ["parentcommandline"]:
3310
- if not is_tag_safelisted(
3311
- text, ["dynamic.process.command_line"], safelist
3312
- ):
3313
- process["pcommand_line"] = text
3314
- elif name in ["commandline"]:
3315
- if not is_tag_safelisted(
3316
- text, ["dynamic.process.command_line"], safelist
3317
- ):
3318
- process["command_line"] = text
3319
- elif name == "originalfilename":
3320
- process["original_file_name"] = text
3321
- elif name == "integritylevel":
3322
- process["integrity_level"] = text
3323
- elif name == "hashes":
3324
- split_hash = text.split("=")
3325
- if len(split_hash) == 2:
3326
- _, hash_value = split_hash
3327
- process["image_hash"] = hash_value
3328
-
3329
- if (
3330
- not process.get("pid")
3331
- or not process.get("image")
3332
- or not process.get("start_time")
3333
- ):
3334
- continue
3335
-
3336
- if ontres.is_guid_in_gpm(process["guid"]):
3337
- ontres.update_process(**process)
3338
- else:
3339
- p_oid = ProcessModel.get_oid(
3340
- {
3341
- "pid": process["pid"],
3342
- "ppid": process.get("ppid"),
3343
- "image": process["image"],
3344
- "command_line": process.get("command_line"),
3345
- }
3346
- )
3347
- p = ontres.create_process(
3348
- objectid=ontres.create_objectid(
3349
- tag=Process.create_objectid_tag(process["image"]),
3350
- ontology_id=p_oid,
3351
- guid=process.get("guid"),
3352
- session=session,
3353
- ),
3354
- **process,
3355
- )
3356
- ontres.add_process(p)
3357
-
3358
-
3359
- def convert_sysmon_network(
3360
- sysmon: List[Dict[str, Any]],
3361
- network: Dict[str, Any],
3362
- safelist: Dict[str, Dict[str, List[str]]],
3363
- convert_timestamp_to_epoch: bool = False,
3364
- ) -> None:
3365
- """
3366
- This method converts network connections observed by Sysmon to the format supported by common sandboxes
3367
- :param sysmon: A list of processes observed during the analysis of the task by the Sysmon tool
3368
- :param network: The JSON of the network section from the report generated by common sandboxes
3369
- :param safelist: A dictionary containing matches and regexes for use in safelisting values
3370
- :param convert_timestamp_to_epoch: A flag indicating if we want timestamps converted to EPOCH
3371
- :return: None
3372
- """
3373
- for event in sysmon:
3374
- event_id = int(event["System"]["EventID"])
3375
-
3376
- # There are two main EventIDs that describe network events: 3 (Network connection) and 22 (DNS query)
3377
- if event_id == 3:
3378
- protocol = None
3379
- network_conn = {
3380
- "src": None,
3381
- "dst": None,
3382
- "time": None,
3383
- "dport": None,
3384
- "sport": None,
3385
- "guid": None,
3386
- "pid": None,
3387
- "image": None,
3388
- }
3389
- for data in event["EventData"]["Data"]:
3390
- name = data["@Name"]
3391
- text = data.get("#text")
3392
- if name == "UtcTime":
3393
- if convert_timestamp_to_epoch:
3394
- network_conn["time"] = datetime.strptime(text, "%Y-%m-%d %H:%M:%S.%f").timestamp()
3395
- else:
3396
- if "." in text:
3397
- text = text[:text.index(".")]
3398
- network_conn["time"] = str(datetime.strptime(text, LOCAL_FMT))
3399
- elif name == "ProcessGuid":
3400
- network_conn["guid"] = text
3401
- elif name == "ProcessId":
3402
- network_conn["pid"] = int(text)
3403
- elif name == "Image":
3404
- network_conn["image"] = text
3405
- elif name == "Protocol":
3406
- protocol = text.lower()
3407
- elif name == "SourceIp":
3408
- if re_match(IPV4_REGEX, text):
3409
- network_conn["src"] = text
3410
- elif name == "SourcePort":
3411
- network_conn["sport"] = int(text)
3412
- elif name == "DestinationIp":
3413
- if re_match(IPV4_REGEX, text):
3414
- network_conn["dst"] = text
3415
- elif name == "DestinationPort":
3416
- network_conn["dport"] = int(text)
3417
- if (
3418
- any(network_conn[key] is None for key in network_conn.keys())
3419
- or not protocol
3420
- ):
3421
- continue
3422
- elif any(
3423
- req["dst"] == network_conn["dst"]
3424
- and req["dport"] == network_conn["dport"]
3425
- and req["src"] == network_conn["src"]
3426
- and req["sport"] == network_conn["sport"]
3427
- for req in network[protocol]
3428
- ):
3429
- # Replace record since we have more info from Sysmon
3430
- for req in network[protocol][:]:
3431
- if (
3432
- req["dst"] == network_conn["dst"]
3433
- and req["dport"] == network_conn["dport"]
3434
- and req["src"] == network_conn["src"]
3435
- and req["sport"] == network_conn["sport"]
3436
- ):
3437
- network[protocol].remove(req)
3438
- network[protocol].append(network_conn)
3439
- else:
3440
- network[protocol].append(network_conn)
3441
- elif event_id == 22:
3442
- dns_query = {
3443
- "type": "A",
3444
- "request": None,
3445
- "answers": [],
3446
- "time": None,
3447
- "guid": None,
3448
- "pid": None,
3449
- "image": None,
3450
- }
3451
- for data in event["EventData"]["Data"]:
3452
- name = data["@Name"]
3453
- text = data.get("#text")
3454
- if text is None:
3455
- continue
3456
- if name == "UtcTime":
3457
- if convert_timestamp_to_epoch:
3458
- dns_query["time"] = datetime.strptime(text, "%Y-%m-%d %H:%M:%S.%f").timestamp()
3459
- else:
3460
- if "." in text:
3461
- text = text[:text.index(".")]
3462
- dns_query["time"] = str(datetime.strptime(text, LOCAL_FMT))
3463
- elif name == "ProcessGuid":
3464
- dns_query["guid"] = text
3465
- elif name == "ProcessId":
3466
- dns_query["pid"] = int(text)
3467
- elif name == "QueryName":
3468
- if not is_tag_safelisted(
3469
- text, ["network.dynamic.domain"], safelist
3470
- ):
3471
- dns_query["request"] = text
3472
- elif name == "QueryResults":
3473
- ip = findall(IPV4_REGEX, text)
3474
- for item in ip:
3475
- dns_query["answers"].append({"data": item, "type": "A"})
3476
- elif name == "Image":
3477
- dns_query["image"] = text
3478
- if any(dns_query[key] is None for key in dns_query.keys()):
3479
- continue
3480
- elif any(
3481
- query["request"] == dns_query["request"]
3482
- for query in network.get("dns", [])
3483
- ):
3484
- # Replace record since we have more info from Sysmon
3485
- for query in network["dns"][:]:
3486
- if query["request"] == dns_query["request"]:
3487
- network["dns"].remove(query)
3488
- network["dns"].append(dns_query)
3489
- else:
3490
- if "dns" not in network:
3491
- network["dns"] = []
3492
- network["dns"].append(dns_query)
3493
-
3494
-
3495
- def extract_iocs_from_text_blob(
3496
- blob: str,
3497
- result_section: ResultTableSection,
3498
- so_sig: Optional[Signature] = None,
3499
- source: Optional[ObjectID] = None,
3500
- enforce_char_min: bool = False,
3501
- enforce_domain_char_max: bool = False,
3502
- safelist: Dict[str, Dict[str, List[str]]] = None,
3503
- is_network_static: bool = False
3504
- ) -> None:
3505
- """
3506
- This method searches for domains, IPs and URIs used in blobs of text and tags them
3507
- :param blob: The blob of text that we will be searching through
3508
- :param result_section: The result section that that tags will be added to
3509
- :param so_sig: The signature for the Ontology Results
3510
- :param source: The source of the signature for the Ontology Results
3511
- :param enforce_char_min: Enforce the minimum amount of characters that an ioc can have
3512
- :param enforce_domain_char_max: Enforce the maximum amount of characters that a domain can have
3513
- :param safelist: The safelist containing matches and regexs. The product of a
3514
- service using self.get_api_interface().get_safelist().
3515
- :param is_network_static: Should we tag these IOCs as static or dynamic? Default to dynamic since this method
3516
- is in the dynamic service helper module.
3517
- :return: None
3518
- """
3519
- if not blob:
3520
- return
3521
-
3522
- if is_network_static:
3523
- network_tag_type = "static"
3524
- else:
3525
- network_tag_type = "dynamic"
3526
-
3527
- blob = blob.lower()
3528
- ips = set(findall(IP_REGEX, blob))
3529
- # There is overlap here between regular expressions, so we want to isolate domains that are not ips
3530
- domains = set(findall(DOMAIN_REGEX, blob)) - ips
3531
- # There is overlap here between regular expressions, so we want to isolate uris that are not domains
3532
- # TODO: Are we missing IOCs to the point where we need a different regex?
3533
- # uris = {uri.decode() for uri in set(findall(PatternMatch.PAT_URI_NO_PROTOCOL, blob.encode()))} - domains - ips
3534
- uris = set(findall(URL_REGEX, blob)) - domains - ips
3535
- for ip in sorted(ips):
3536
- if add_tag(result_section, f"network.{network_tag_type}.ip", ip, safelist):
3537
- if not result_section.section_body.body:
3538
- result_section.add_row(TableRow(ioc_type="ip", ioc=ip))
3539
- elif (
3540
- dumps({"ioc_type": "ip", "ioc": ip})
3541
- not in result_section.section_body.body
3542
- ):
3543
- result_section.add_row(TableRow(ioc_type="ip", ioc=ip))
3544
- for domain in sorted(domains):
3545
- if enforce_char_min and len(domain) < MIN_DOMAIN_CHARS:
3546
- continue
3547
- if enforce_domain_char_max and len(domain) > MAX_DOMAIN_CHARS:
3548
- continue
3549
-
3550
- # Check if the domain ends with a TLD that is frequently a false positive
3551
- if any(domain.lower().endswith(tld) for tld in COMMON_FP_TLDS):
3552
- is_domain_present_in_uri = False
3553
- for uri in uris:
3554
- parsed_uri = urlparse(uri.lower())
3555
- if domain == parsed_uri.hostname:
3556
- is_domain_present_in_uri = True
3557
- break
3558
-
3559
- # If it does, then double check that the domain is not the domain of any URI
3560
- if not is_domain_present_in_uri:
3561
- continue
3562
- elif domain.lower() in COMMON_FP_DOMAINS:
3563
- continue
3564
-
3565
- # File names match the domain and URI regexes, so we need to avoid tagging them
3566
- # Note that get_tld only takes URLs so we will prepend http:// to the domain to work around this
3567
- if add_tag(result_section, f"network.{network_tag_type}.domain", domain, safelist):
3568
- if not result_section.section_body.body:
3569
- result_section.add_row(TableRow(ioc_type="domain", ioc=domain))
3570
- elif (
3571
- dumps({"ioc_type": "domain", "ioc": domain})
3572
- not in result_section.section_body.body
3573
- ):
3574
- result_section.add_row(TableRow(ioc_type="domain", ioc=domain))
3575
-
3576
- for uri in sorted(uris):
3577
- if enforce_char_min and len(uri) < MIN_URI_CHARS:
3578
- continue
3579
- if any(invalid_uri_char in uri for invalid_uri_char in ['"', "'", '<', '>', "(", ")"]):
3580
- for invalid_uri_char in ['"', "'", '<', '>', "(", ")"]:
3581
- for u in uri.split(invalid_uri_char):
3582
- if re_match(FULL_URI, u):
3583
- uri = u
3584
- break
3585
-
3586
- # If there is an common protocol in the URI, and there are some nonsense characters included, exclude them!
3587
- if ":" in uri:
3588
- scheme, location = uri.split(":", 1)
3589
- if scheme not in COMMON_SCHEMES:
3590
- for common_scheme in COMMON_SCHEMES:
3591
- if scheme.endswith(common_scheme):
3592
- scheme = common_scheme
3593
- uri = f"{scheme}:{location}"
3594
- break
3595
-
3596
- if add_tag(result_section, f"network.{network_tag_type}.uri", uri, safelist):
3597
- if not result_section.section_body.body:
3598
- result_section.add_row(TableRow(ioc_type="uri", ioc=uri))
3599
- elif (
3600
- dumps({"ioc_type": "uri", "ioc": uri})
3601
- not in result_section.section_body.body
3602
- ):
3603
- result_section.add_row(TableRow(ioc_type="uri", ioc=uri))
3604
- if so_sig and source:
3605
- so_sig.add_attribute(so_sig.create_attribute(source=source, uri=uri))
3606
- # If the tag was safelisted or invalid, don't try to tag the uri_path
3607
- else:
3608
- continue
3609
- if "//" in uri:
3610
- uri = uri.split("//")[1]
3611
- for uri_path in findall(URI_PATH, uri):
3612
- if enforce_char_min and len(uri_path) < MIN_URI_PATH_CHARS:
3613
- continue
3614
- if add_tag(result_section, f"network.{network_tag_type}.uri_path", uri_path, safelist):
3615
- if not result_section.section_body.body:
3616
- result_section.add_row(TableRow(ioc_type="uri_path", ioc=uri_path))
3617
- elif (
3618
- dumps({"ioc_type": "uri_path", "ioc": uri_path})
3619
- not in result_section.section_body.body
3620
- ):
3621
- result_section.add_row(TableRow(ioc_type="uri_path", ioc=uri_path))
3622
-
3623
-
3624
- # DEBUGGING METHOD
3625
- if __name__ == "__main__":
3626
- # This method is for validating the output from the OntologyResults class -> Sandbox class
3627
- from sys import argv
3628
-
3629
- so_json_path = argv[1]
3630
- default_so = OntologyResults()
3631
- default_so.preprocess_ontology(safelist=[], from_main=True, so_json=so_json_path)