assemblyline-v4-service 4.4.0.24__py3-none-any.whl → 4.4.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

Files changed (42) hide show
  1. assemblyline_v4_service/VERSION +1 -1
  2. assemblyline_v4_service/common/api.py +3 -2
  3. assemblyline_v4_service/common/base.py +3 -4
  4. assemblyline_v4_service/common/helper.py +1 -2
  5. assemblyline_v4_service/common/{extractor/ocr.py → ocr.py} +0 -1
  6. assemblyline_v4_service/common/ontology_helper.py +7 -8
  7. assemblyline_v4_service/common/request.py +4 -5
  8. assemblyline_v4_service/common/result.py +3 -3
  9. assemblyline_v4_service/common/task.py +3 -3
  10. assemblyline_v4_service/common/utils.py +2 -2
  11. assemblyline_v4_service/updater/helper.py +4 -0
  12. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/METADATA +1 -1
  13. assemblyline_v4_service-4.4.0.26.dist-info/RECORD +28 -0
  14. assemblyline_v4_service/common/balbuzard/__init__.py +0 -0
  15. assemblyline_v4_service/common/balbuzard/balbuzard.py +0 -656
  16. assemblyline_v4_service/common/balbuzard/bbcrack.py +0 -830
  17. assemblyline_v4_service/common/balbuzard/patterns.py +0 -650
  18. assemblyline_v4_service/common/dynamic_service_helper.py +0 -3631
  19. assemblyline_v4_service/common/extractor/__init__.py +0 -1
  20. assemblyline_v4_service/common/extractor/base64.py +0 -86
  21. assemblyline_v4_service/common/extractor/pe_file.py +0 -51
  22. assemblyline_v4_service/common/icap.py +0 -149
  23. assemblyline_v4_service/common/keytool_parse.py +0 -66
  24. assemblyline_v4_service/common/pestudio/__init__.py +0 -0
  25. assemblyline_v4_service/common/pestudio/xml/__init__.py +0 -0
  26. assemblyline_v4_service/common/pestudio/xml/features.xml +0 -5607
  27. assemblyline_v4_service/common/pestudio/xml/functions.xml +0 -5824
  28. assemblyline_v4_service/common/pestudio/xml/languages.xml +0 -375
  29. assemblyline_v4_service/common/pestudio/xml/resources.xml +0 -511
  30. assemblyline_v4_service/common/pestudio/xml/signatures.xml +0 -29105
  31. assemblyline_v4_service/common/pestudio/xml/strings.xml +0 -2379
  32. assemblyline_v4_service/common/safelist_helper.py +0 -73
  33. assemblyline_v4_service/common/section_reducer.py +0 -43
  34. assemblyline_v4_service/common/tag_helper.py +0 -117
  35. assemblyline_v4_service/common/tag_reducer.py +0 -242
  36. assemblyline_v4_service/testing/__init__.py +0 -0
  37. assemblyline_v4_service/testing/helper.py +0 -463
  38. assemblyline_v4_service/testing/regenerate_results.py +0 -37
  39. assemblyline_v4_service-4.4.0.24.dist-info/RECORD +0 -53
  40. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/LICENCE.md +0 -0
  41. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/WHEEL +0 -0
  42. {assemblyline_v4_service-4.4.0.24.dist-info → assemblyline_v4_service-4.4.0.26.dist-info}/top_level.txt +0 -0
@@ -1,73 +0,0 @@
1
- from re import compile, IGNORECASE, match, search
2
- from typing import Dict, List
3
- from urllib.parse import urlparse
4
-
5
- from assemblyline.odm.base import DOMAIN_REGEX, IP_REGEX
6
-
7
- URL_REGEX = compile(
8
- r"(?:(?:(?:[A-Za-z]*:)?//)?(?:\S+(?::\S*)?@)?(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}"
9
- r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:(?:[A-Za-z0-9\u00a1-\uffff][A-Za-z0-9\u00a1-\uffff_-]{0,62})"
10
- r"?[A-Za-z0-9\u00a1-\uffff]\.)+(?:xn--)?(?:[A-Za-z0-9\u00a1-\uffff]{2,}\.?))(?::\d{2,5})?)(?:[/?#][^\s,\\\\]*)?")
11
-
12
-
13
- def is_tag_safelisted(
14
- value: str, tags: List[str],
15
- safelist: Dict[str, Dict[str, List[str]]],
16
- substring: bool = False) -> bool:
17
- """
18
- This method determines if a given value has any safelisted components.
19
- :param value: The value to be checked if it has been safelisted
20
- :param tags: The tags which will be used for grabbing specific values from the safelist
21
- :param safelist: The safelist containing matches and regexs. The
22
- product of a service using self.get_api_interface().get_safelist().
23
- :param substring: A flag that indicates if we should check if the value is contained within the match
24
- :return: A boolean indicating if the value has been safelisted
25
- """
26
- if not value or not tags or not safelist:
27
- return False
28
-
29
- if not any(key in safelist for key in ["match", "regex"]):
30
- return False
31
-
32
- safelist_matches = safelist.get("match", {})
33
- safelist_regexes = safelist.get("regex", {})
34
-
35
- for tag in tags:
36
- if tag in safelist_matches:
37
- for safelist_match in safelist_matches[tag]:
38
- if value.lower() == safelist_match.lower():
39
- return True
40
- elif substring and safelist_match.lower() in value.lower():
41
- return True
42
-
43
- if tag in safelist_regexes:
44
- for safelist_regex in safelist_regexes[tag]:
45
- if match(safelist_regex, value, IGNORECASE):
46
- return True
47
-
48
- return False
49
-
50
-
51
- def contains_safelisted_value(val: str, safelist: Dict[str, Dict[str, List[str]]]) -> bool:
52
- """
53
- This method checks if a given value is part of a safelist
54
- :param val: The given value
55
- :param safelist: A dictionary containing matches and regexes for use in safelisting values
56
- :return: A boolean representing if the given value is part of a safelist
57
- """
58
- if not val or not isinstance(val, str):
59
- return False
60
- ip = search(IP_REGEX, val)
61
- url = search(URL_REGEX, val)
62
- domain = search(DOMAIN_REGEX, val)
63
- if ip is not None:
64
- ip = ip.group()
65
- return is_tag_safelisted(ip, ["network.dynamic.ip"], safelist)
66
- elif domain is not None:
67
- domain = domain.group()
68
- return is_tag_safelisted(domain, ["network.dynamic.domain"], safelist)
69
- elif url is not None:
70
- url_pieces = urlparse(url.group())
71
- domain = url_pieces.netloc
72
- return is_tag_safelisted(domain, ["network.dynamic.domain"], safelist)
73
- return False
@@ -1,43 +0,0 @@
1
- from assemblyline_v4_service.common.result import Result, ResultSection
2
- from assemblyline_v4_service.common.tag_reducer import REDUCE_MAP
3
-
4
-
5
- def reduce(al_result: Result) -> Result:
6
- """
7
- This function goes through a result section recursively and try reduce the amount of
8
- produced tags based on a reducer set for each specific tags
9
-
10
- :param al_result: An Assemblyline result object
11
- :return: Reduced Assemblyline result object
12
- """
13
- for section in al_result.sections:
14
- _section_traverser(section)
15
- return al_result
16
-
17
-
18
- def _section_traverser(section: ResultSection = None) -> ResultSection:
19
- """
20
- This function goes through each section and sends the tags to a function
21
- that will reduce specific tags
22
-
23
- :param section: An Assemblyline result section
24
- :return: Reduced Assemblyline result section
25
- """
26
- for subsection in section.subsections:
27
- _section_traverser(subsection)
28
- if section.tags:
29
- section.set_tags(_reduce_specific_tags(section.tags))
30
- return section
31
-
32
-
33
- def _reduce_specific_tags(tags=None) -> dict:
34
- """
35
- This function is very much a work in progress. Currently the only tags that we
36
- feel the need to reduce are unique uris and uri paths
37
- :param tags: Dictionary of tag types and their values
38
- :return: Dictionary of tag types and their reduced values
39
- """
40
- if tags is None:
41
- tags = {}
42
-
43
- return {tag_type: REDUCE_MAP.get(tag_type, lambda x: x)(tag_values) for tag_type, tag_values in tags.items()}
@@ -1,117 +0,0 @@
1
- from re import match, search
2
- from typing import Any, Dict, List, Optional, Union
3
-
4
- from assemblyline.common.net import is_valid_domain, is_valid_ip
5
- from assemblyline.common.str_utils import safe_str
6
- from assemblyline.odm.base import DOMAIN_ONLY_REGEX, DOMAIN_REGEX, FULL_URI, IP_REGEX, URI_PATH
7
- from assemblyline_v4_service.common.result import ResultSection
8
- from assemblyline_v4_service.common.safelist_helper import is_tag_safelisted
9
-
10
-
11
- def add_tag(
12
- result_section: ResultSection,
13
- tag: str, value: Union[Any, List[Any]],
14
- safelist: Dict[str, Dict[str, List[str]]] = None
15
- ) -> bool:
16
- """
17
- This method adds the value(s) as a tag to the ResultSection. Can take a list of values or a single value.
18
- :param result_section: The ResultSection that the tag will be added to
19
- :param tag: The tag type that the value will be tagged under
20
- :param value: The value, a single item or a list, that will be tagged under the tag type
21
- :param safelist: The safelist containing matches and regexs. The product of a
22
- service using self.get_api_interface().get_safelist().
23
- :return: Tag was successfully added
24
- """
25
- if safelist is None:
26
- safelist = {}
27
-
28
- tags_were_added = False
29
- if not value:
30
- return tags_were_added
31
-
32
- if type(value) == list:
33
- for item in value:
34
- # If one tag is added, then return True
35
- tags_were_added = _validate_tag(result_section, tag, item, safelist) or tags_were_added
36
- else:
37
- tags_were_added = _validate_tag(result_section, tag, value, safelist)
38
- return tags_were_added
39
-
40
-
41
- def _get_regex_for_tag(tag: str) -> str:
42
- """
43
- This method returns a regular expression used for validating a certain tag type
44
- :param tag: The type of tag
45
- :return: The relevant regular expression
46
- """
47
- reg_to_match: Optional[str] = None
48
- if "domain" in tag:
49
- reg_to_match = DOMAIN_ONLY_REGEX
50
- elif "uri_path" in tag:
51
- reg_to_match = URI_PATH
52
- elif "uri" in tag:
53
- reg_to_match = FULL_URI
54
- elif "ip" in tag:
55
- reg_to_match = IP_REGEX
56
- return reg_to_match
57
-
58
-
59
- def _validate_tag(
60
- result_section: ResultSection,
61
- tag: str,
62
- value: Any,
63
- safelist: Dict[str, Dict[str, List[str]]] = None
64
- ) -> bool:
65
- """
66
- This method validates the value relative to the tag type before adding the value as a tag to the ResultSection.
67
- :param result_section: The ResultSection that the tag will be added to
68
- :param tag: The tag type that the value will be tagged under
69
- :param value: The item that will be tagged under the tag type
70
- :param safelist: The safelist containing matches and regexs. The product of a
71
- service using self.get_api_interface().get_safelist().
72
- :return: Tag was successfully added
73
- """
74
- if safelist is None:
75
- safelist = {}
76
-
77
- if tag.startswith("network.static."):
78
- network_tag_type = "static"
79
- else:
80
- network_tag_type = "dynamic"
81
-
82
- regex = _get_regex_for_tag(tag)
83
- if regex and not match(regex, value):
84
- return False
85
-
86
- if "ip" in tag and not is_valid_ip(value):
87
- return False
88
-
89
- if "domain" in tag and not is_valid_domain(value):
90
- return False
91
-
92
- if is_tag_safelisted(value, [tag], safelist):
93
- return False
94
-
95
- # if "uri" is in the tag, let's try to extract its domain/ip and tag it.
96
- if "uri_path" not in tag and "uri" in tag:
97
- # First try to get the domain
98
- valid_domain = False
99
- domain = search(DOMAIN_REGEX, value)
100
- if domain:
101
- domain = domain.group()
102
- valid_domain = _validate_tag(result_section, f"network.{network_tag_type}.domain", domain, safelist)
103
- # Then try to get the IP
104
- valid_ip = False
105
- ip = search(IP_REGEX, value)
106
- if ip:
107
- ip = ip.group()
108
- valid_ip = _validate_tag(result_section, f"network.{network_tag_type}.ip", ip, safelist)
109
-
110
- if value not in [domain, ip] and (valid_domain or valid_ip):
111
- result_section.add_tag(tag, safe_str(value))
112
- else:
113
- return False
114
- else:
115
- result_section.add_tag(tag, safe_str(value))
116
-
117
- return True
@@ -1,242 +0,0 @@
1
- import regex as re
2
- import os.path
3
-
4
- from copy import deepcopy
5
- from typing import List
6
- from urllib.parse import urlparse, parse_qs, urlunparse, urlencode, unquote
7
-
8
- NUMBER_REGEX = re.compile("[0-9]*")
9
- ALPHA_REGEX = re.compile("[a-zA-Z]*")
10
- ALPHANUM_REGEX = re.compile("[a-zA-Z0-9]*")
11
- BASE64_REGEX = re.compile("(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?")
12
- DO_NOT_REDUCE = ["netloc", "hostname"]
13
-
14
-
15
- def reduce_uri_tags(uris=None) -> List[str]:
16
- """
17
- The purpose of this helper function is to reduce the amount of unique uris to be tagged.
18
- ex. If a sample makes a hundred network calls to four unqiue domains, with only one parameter
19
- changing in the HTTP request each time, this should be synthesized to four uris to
20
- be tagged, but with a placeholder for the parameter(s) that changes in each callout.
21
- """
22
- if uris is None:
23
- uris = []
24
-
25
- parsed_uris = []
26
- reduced_uris = set()
27
- for uri in uris:
28
- parsed_uri = urlparse(uri)
29
- # Match items we care about into a nice dictionary
30
- uri_dict = {
31
- "scheme": parsed_uri.scheme, # scheme param
32
- "netloc": parsed_uri.netloc, # ""
33
- "path": parsed_uri.path, # ""
34
- "params": parsed_uri.params, # ""
35
- "query": parsed_uri.query, # ""
36
- "fragment": parsed_uri.fragment, # ""
37
- "username": parsed_uri.username, # None
38
- "password": parsed_uri.password, # None
39
- "hostname": parsed_uri.hostname, # None
40
- "port": parsed_uri.port # None
41
- }
42
-
43
- # We need to parse a couple of the returned params from urlparse more in-depth
44
- if uri_dict["query"] != "":
45
- # note that values of keys in dict will be in lists of length 1, which we don't want
46
- uri_dict["query"] = parse_qs(uri_dict["query"])
47
- if uri_dict["path"] != "":
48
- # converting tuple to list
49
- uri_dict["path"] = list(os.path.split(uri_dict["path"]))
50
- # removing lone slashes
51
- uri_dict["path"] = [not_slash for not_slash in uri_dict["path"] if not_slash != "/"]
52
-
53
- parsed_uris.append(uri_dict)
54
-
55
- # iterate through, comparing two parsed uris. if the percentage of similarity
56
- # is greater than x, then they are sufficiently similar and can have parts
57
- # replaced.
58
-
59
- # time for the smarts
60
- comparison_uris = deepcopy(parsed_uris)
61
- for parsed_uri in parsed_uris:
62
- # this flag will be used to check if this uri matches any other uri ever
63
- totally_unique = True
64
- for comparison_uri in comparison_uris:
65
- if parsed_uri == comparison_uri:
66
- continue
67
- equal_keys = 0
68
- total_list_len = 0
69
- total_dict_len = 0
70
- difference = {}
71
- # now go through each key, and check for equality
72
- for key in parsed_uri.keys():
73
- val = parsed_uri[key]
74
- comp_val = comparison_uri[key]
75
-
76
- # if equal, add to count of similar keys
77
- if type(val) == list:
78
- val_len = len(val)
79
- if val == comp_val:
80
- equal_keys += val_len
81
- else:
82
- difference[key] = dict()
83
- comp_len = len(comp_val)
84
- max_list_len = max(val_len, comp_len)
85
- for item in range(max_list_len):
86
- if item >= comp_len or item >= val_len:
87
- # bail!
88
- break
89
- if val[item] == comp_val[item]:
90
- equal_keys += 1
91
- else:
92
- difference[key][item] = []
93
- difference[key][item].append(val[item])
94
- difference[key][item].append(comp_val[item])
95
- total_list_len += val_len
96
-
97
- elif type(val) == dict:
98
- val_len = len(val)
99
- if val == comp_val:
100
- equal_keys += val_len
101
- else:
102
- difference[key] = dict()
103
- if comp_val != "":
104
- comp_keys = list(comp_val.keys())
105
- val_keys = list(val.keys())
106
- all_keys = set(comp_keys + val_keys)
107
- val_len = len(all_keys)
108
-
109
- for item in all_keys:
110
- if val.get(item) and comp_val.get(item) and val[item] == comp_val[item]:
111
- equal_keys += 1
112
- else:
113
- difference[key][item] = []
114
- if val.get(item):
115
- difference[key][item].append(val[item])
116
- if comp_val.get(item):
117
- difference[key][item].append(comp_val[item])
118
- total_dict_len += val_len
119
- else: # Not dict or a list
120
- if val == comp_val:
121
- equal_keys += 1
122
- else:
123
- difference[key] = []
124
- difference[key].append(val)
125
- difference[key].append(comp_val)
126
- # now find percentage similar
127
- if total_dict_len > 1 and total_list_len > 1:
128
- percentage_equal = equal_keys / (len(parsed_uri.keys()) - 2 + total_list_len + total_dict_len)
129
- elif total_dict_len > 1 or total_list_len > 1:
130
- percentage_equal = equal_keys / (len(parsed_uri.keys()) - 1 + total_list_len + total_dict_len)
131
- else:
132
- percentage_equal = equal_keys / (len(parsed_uri.keys()) + total_list_len + total_dict_len)
133
-
134
- # if percentage equal is > some value (say 90), then we can say that
135
- # urls are similar enough to reduce
136
- if percentage_equal >= 0.80:
137
- # So that we don't overwrite details
138
- comparison_uri_copy = deepcopy(comparison_uri)
139
- # somehow recognize where parameters are that match and replace them.
140
- for item in difference.keys():
141
- # We don't want to replace the following:
142
- if item in DO_NOT_REDUCE:
143
- continue
144
-
145
- val = difference[item]
146
- if item == "query":
147
- for key in val.keys():
148
- placeholders = []
149
- # since each of these items is a list of lists
150
- for val_item in val[key]:
151
- # use regex to determine the parameter type
152
- value = val_item[0]
153
- placeholder = _get_placeholder(value)
154
- placeholders.append(placeholder)
155
- if len(set(placeholders)) == 1:
156
- # the same placeholder type is consistent with all values
157
- # update the url_dict value
158
- comparison_uri_copy[item][key] = list(set(placeholders))
159
- else:
160
- # the placeholder types vary
161
- comparison_uri_copy[item][key] = ",".join(placeholders)
162
- elif item == "path":
163
- placeholders = {}
164
- for key in val.keys():
165
- placeholders[key] = []
166
- for list_item in val[key]:
167
- # if / exists, pop the rest out
168
- if list_item != "/" and list_item[0] == "/":
169
- # use regex to determine the parameter type
170
- placeholder = _get_placeholder(list_item[1:])
171
- placeholders[key].append("/"+placeholder)
172
- else:
173
- placeholder = _get_placeholder(list_item)
174
- placeholders[key].append(placeholder)
175
- for key in placeholders.keys():
176
- if len(set(placeholders[key])) == 1:
177
- # the same placeholder type is consistent with all values
178
- # update the comparison_uri_copy value
179
- comparison_uri_copy[item][key] = list(set(placeholders[key]))[0]
180
- else:
181
- # the placeholder types vary
182
- comparison_uri_copy[item][key] = ",".join(set(placeholders[key]))
183
- else:
184
- comparison_uri_copy[item] = _get_placeholder(val)
185
-
186
- # now it's time to rejoin the parts of the url
187
- reduced_uris.add(_turn_back_into_uri(comparison_uri_copy))
188
- totally_unique = False
189
-
190
- # Congratulations, you are one in a million
191
- if totally_unique:
192
- reduced_uris.add(_turn_back_into_uri(parsed_uri))
193
- reduced_uris_list = list(reduced_uris)
194
- # recursive_list = reduce_uri_tags(reduced_uris_list)
195
- # if len(recursive_list) < len(reduced_uris_list):
196
- # return reduced_uris_list
197
- # elif
198
- # if reduce_uri_tags(reduced_uris_list))
199
- return reduced_uris_list
200
-
201
-
202
- def _turn_back_into_uri(uri_parts: dict) -> str:
203
- # turn the path back into a string
204
- uri_parts["path"] = '/'.join(uri_parts["path"])
205
- # turn the query back into a query string
206
- # first, remove the list wrappers
207
- if uri_parts["query"] != "":
208
- for item in uri_parts["query"].keys():
209
- uri_parts["query"][item] = uri_parts["query"][item][0]
210
- uri_parts["query"] = unquote(urlencode(uri_parts["query"]))
211
-
212
- uri_tuple = (uri_parts["scheme"], uri_parts["netloc"],
213
- uri_parts["path"], uri_parts["params"],
214
- uri_parts["query"], uri_parts["fragment"])
215
- real_url = urlunparse(uri_tuple)
216
- return real_url
217
-
218
-
219
- def _get_placeholder(val: str) -> str:
220
- if not val:
221
- return "${UNKNOWN_TYPE}"
222
-
223
- if NUMBER_REGEX.fullmatch(val):
224
- placeholder = "${NUMBER}"
225
- elif ALPHA_REGEX.fullmatch(val):
226
- placeholder = "${ALPHA}"
227
- # Note that BASE64 Regex must happen before ALPHANUM regex or else ALPHANUM will hit on BASE64
228
- elif BASE64_REGEX.fullmatch(val):
229
- placeholder = "${BASE64}"
230
- elif ALPHANUM_REGEX.fullmatch(val):
231
- placeholder = "${ALPHA_NUM}"
232
- else:
233
- placeholder = "${UNKNOWN_TYPE}"
234
- return placeholder
235
-
236
-
237
- REDUCE_MAP = {
238
- "network.dynamic.uri": reduce_uri_tags,
239
- "network.static.uri": reduce_uri_tags,
240
- "network.dynamic.uri_path": reduce_uri_tags,
241
- "network.static.uri_path": reduce_uri_tags
242
- }
File without changes