atomicshop 2.14.12__py3-none-any.whl → 2.14.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of atomicshop might be problematic. Click here for more details.
- atomicshop/__init__.py +1 -1
- atomicshop/config_init.py +1 -1
- atomicshop/filesystem.py +14 -3
- atomicshop/mitm/import_config.py +3 -3
- atomicshop/mitm/statistic_analyzer.py +79 -478
- atomicshop/mitm/statistic_analyzer_helper/__init__.py +0 -0
- atomicshop/mitm/statistic_analyzer_helper/analyzer_helper.py +136 -0
- atomicshop/mitm/statistic_analyzer_helper/moving_average_helper.py +330 -0
- atomicshop/question_answer_engine.py +2 -2
- atomicshop/wrappers/elasticsearchw/infrastructure.py +1 -1
- atomicshop/wrappers/loggingw/reading.py +2 -3
- atomicshop/wrappers/socketw/socket_client.py +1 -1
- {atomicshop-2.14.12.dist-info → atomicshop-2.14.14.dist-info}/METADATA +1 -1
- {atomicshop-2.14.12.dist-info → atomicshop-2.14.14.dist-info}/RECORD +17 -14
- {atomicshop-2.14.12.dist-info → atomicshop-2.14.14.dist-info}/LICENSE.txt +0 -0
- {atomicshop-2.14.12.dist-info → atomicshop-2.14.14.dist-info}/WHEEL +0 -0
- {atomicshop-2.14.12.dist-info → atomicshop-2.14.14.dist-info}/top_level.txt +0 -0
|
@@ -1,146 +1,17 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import datetime
|
|
2
|
-
import statistics
|
|
3
3
|
import json
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Union
|
|
5
5
|
|
|
6
|
+
from .statistic_analyzer_helper import analyzer_helper, moving_average_helper
|
|
6
7
|
from .. import filesystem, domains, datetimes, urls
|
|
7
8
|
from ..basics import dicts
|
|
8
|
-
from ..file_io import tomls, xlsxs,
|
|
9
|
-
from ..wrappers.loggingw import reading
|
|
9
|
+
from ..file_io import tomls, xlsxs, jsons
|
|
10
|
+
from ..wrappers.loggingw import reading
|
|
10
11
|
from ..print_api import print_api
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
"""
|
|
15
|
-
This function gets the last day number from the statistics content.
|
|
16
|
-
|
|
17
|
-
:param statistics_content: list, of lines in the statistics content.
|
|
18
|
-
:param stop_after_lines: integer, if specified, the function will stop after the specified number of lines.
|
|
19
|
-
:return: integer, the last day number.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
last_day_number = None
|
|
23
|
-
start_time_temp = None
|
|
24
|
-
for line_index, line in enumerate(statistics_content):
|
|
25
|
-
try:
|
|
26
|
-
request_time = datetime.datetime.strptime(line['request_time_sent'], '%Y-%m-%d %H:%M:%S.%f')
|
|
27
|
-
except ValueError:
|
|
28
|
-
continue
|
|
29
|
-
|
|
30
|
-
if not start_time_temp:
|
|
31
|
-
start_time_temp = request_time
|
|
32
|
-
|
|
33
|
-
if stop_after_lines:
|
|
34
|
-
if line_index == stop_after_lines:
|
|
35
|
-
break
|
|
36
|
-
|
|
37
|
-
last_day_number = datetimes.get_difference_between_dates_in_days(start_time_temp, request_time)
|
|
38
|
-
return last_day_number
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def create_empty_features_dict() -> dict:
|
|
42
|
-
"""
|
|
43
|
-
This function creates an empty dictionary for the daily stats. This should be initiated for each 'host_type' of:
|
|
44
|
-
'domain', 'subdomain', 'url_no_parameters'.
|
|
45
|
-
:return: dict
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
return {
|
|
49
|
-
'total_count': {}, 'normal_count': {}, 'error_count': {},
|
|
50
|
-
'request_0_byte_count': {}, 'response_0_byte_count': {},
|
|
51
|
-
'request_sizes_list': {}, 'response_sizes_list': {},
|
|
52
|
-
'request_sizes_no_0_bytes_list': {}, 'response_sizes_no_0_bytes_list': {},
|
|
53
|
-
'average_request_size': {}, 'average_response_size': {},
|
|
54
|
-
'average_request_size_no_0_bytes': {}, 'average_response_size_no_0_bytes': {}}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def add_to_count_to_daily_stats(
|
|
58
|
-
daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str) -> None:
|
|
59
|
-
"""
|
|
60
|
-
This function adds 1 to the 'count' feature of the current day in the daily stats.
|
|
61
|
-
|
|
62
|
-
:param daily_stats: dict, the daily statistics dict.
|
|
63
|
-
:param current_day: integer, the current day number.
|
|
64
|
-
:param last_day: integer, the last day number.
|
|
65
|
-
:param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
|
|
66
|
-
:param feature: string, the feature to add the count to. Can be: 'total_count', 'normal_count', 'error_count',
|
|
67
|
-
'request_0_byte_count', 'response_0_byte_count'.
|
|
68
|
-
:param host_name: string, the name of the host.
|
|
69
|
-
|
|
70
|
-
:return: None.
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
# Aggregate daily domain hits.
|
|
74
|
-
if host_name not in daily_stats[host_type][feature].keys():
|
|
75
|
-
daily_stats[host_type][feature][host_name] = {}
|
|
76
|
-
# Iterate from first day to the last day.
|
|
77
|
-
for day in range(0, last_day + 1):
|
|
78
|
-
daily_stats[host_type][feature][host_name][day] = 0
|
|
79
|
-
|
|
80
|
-
# Add count to current day.
|
|
81
|
-
daily_stats[host_type][feature][host_name][current_day] += 1
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def add_to_list_to_daily_stats(
|
|
85
|
-
daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
|
|
86
|
-
size: float) -> None:
|
|
87
|
-
"""
|
|
88
|
-
This function adds the 'size' to the 'feature' list of the current day in the daily stats.
|
|
89
|
-
|
|
90
|
-
:param daily_stats: dict, the daily statistics dict.
|
|
91
|
-
:param current_day: integer, the current day number.
|
|
92
|
-
:param last_day: integer, the last day number.
|
|
93
|
-
:param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
|
|
94
|
-
:param feature: string, the feature to add the count to. Can be: 'request_sizes_list', 'response_sizes_list',
|
|
95
|
-
'request_sizes_no_0_bytes_list', 'response_sizes_no_0_bytes_list'.
|
|
96
|
-
:param host_name: string, the name of the host.
|
|
97
|
-
:param size: float, the size in bytes to add to the list.
|
|
98
|
-
|
|
99
|
-
:return: None.
|
|
100
|
-
"""
|
|
101
|
-
|
|
102
|
-
# Aggregate daily domain hits.
|
|
103
|
-
if host_name not in daily_stats[host_type][feature].keys():
|
|
104
|
-
daily_stats[host_type][feature][host_name] = {}
|
|
105
|
-
# Iterate from first day to the last day.
|
|
106
|
-
for day in range(0, last_day + 1):
|
|
107
|
-
daily_stats[host_type][feature][host_name][day] = []
|
|
108
|
-
|
|
109
|
-
# Add count to current day.
|
|
110
|
-
daily_stats[host_type][feature][host_name][current_day].append(size)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def add_to_average_to_daily_stats(
|
|
114
|
-
daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
|
|
115
|
-
list_of_sizes: list) -> None:
|
|
116
|
-
"""
|
|
117
|
-
This function adds the average size in bytes calculated from the 'list_of_sizes' to the 'feature' of the current
|
|
118
|
-
day in the daily stats.
|
|
119
|
-
|
|
120
|
-
:param daily_stats: dict, the daily statistics dict.
|
|
121
|
-
:param current_day: integer, the current day number.
|
|
122
|
-
:param last_day: integer, the last day number.
|
|
123
|
-
:param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
|
|
124
|
-
:param feature: string, the feature to add the count to. Can be: 'average_request_size', 'average_response_size',
|
|
125
|
-
'average_request_size_no_0_bytes', 'average_response_size_no_0_bytes'.
|
|
126
|
-
:param host_name: string, the name of the host.
|
|
127
|
-
:param list_of_sizes: list, the list of sizes to calculate the average from.
|
|
128
|
-
|
|
129
|
-
:return: None.
|
|
130
|
-
"""
|
|
131
|
-
|
|
132
|
-
# Aggregate daily domain hits.
|
|
133
|
-
if host_name not in daily_stats[host_type][feature].keys():
|
|
134
|
-
daily_stats[host_type][feature][host_name] = {}
|
|
135
|
-
# Iterate from first day to the last day.
|
|
136
|
-
for day in range(0, last_day + 1):
|
|
137
|
-
daily_stats[host_type][feature][host_name][day] = 0
|
|
138
|
-
|
|
139
|
-
# If the list of size is empty, add 0 to the average, since we cannot divide by 0.
|
|
140
|
-
if len(list_of_sizes) == 0:
|
|
141
|
-
daily_stats[host_type][feature][host_name][current_day] = 0
|
|
142
|
-
else:
|
|
143
|
-
daily_stats[host_type][feature][host_name][current_day] = sum(list_of_sizes) / len(list_of_sizes)
|
|
14
|
+
STATISTICS_FILE_NAME: str = 'statistics.csv'
|
|
144
15
|
|
|
145
16
|
|
|
146
17
|
def analyze(main_file_path: str):
|
|
@@ -172,9 +43,9 @@ def analyze(main_file_path: str):
|
|
|
172
43
|
'subdomain': {'total_count': {}, 'normal_count': {}, 'error_count': {}}
|
|
173
44
|
}
|
|
174
45
|
daily_stats: dict = {
|
|
175
|
-
'domain': create_empty_features_dict(),
|
|
176
|
-
'subdomain': create_empty_features_dict(),
|
|
177
|
-
'url_no_parameters': create_empty_features_dict()
|
|
46
|
+
'domain': analyzer_helper.create_empty_features_dict(),
|
|
47
|
+
'subdomain': analyzer_helper.create_empty_features_dict(),
|
|
48
|
+
'url_no_parameters': analyzer_helper.create_empty_features_dict()
|
|
178
49
|
}
|
|
179
50
|
|
|
180
51
|
# Start the main loop.
|
|
@@ -195,7 +66,7 @@ def analyze(main_file_path: str):
|
|
|
195
66
|
|
|
196
67
|
# Find the last day number. If 'break_after_lines' is specified, the loop will stop after the specified line.
|
|
197
68
|
if not last_day_number:
|
|
198
|
-
last_day_number = get_the_last_day_number(statistics_content, break_after_lines)
|
|
69
|
+
last_day_number = analyzer_helper.get_the_last_day_number(statistics_content, break_after_lines)
|
|
199
70
|
|
|
200
71
|
if break_after_lines:
|
|
201
72
|
if line_index == break_after_lines:
|
|
@@ -295,87 +166,87 @@ def analyze(main_file_path: str):
|
|
|
295
166
|
day_number = datetimes.get_difference_between_dates_in_days(start_time, request_time)
|
|
296
167
|
|
|
297
168
|
# Add 1 to the total count of the current day.
|
|
298
|
-
add_to_count_to_daily_stats(
|
|
169
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
299
170
|
daily_stats, day_number, last_day_number, 'domain', 'total_count', main_domain)
|
|
300
|
-
add_to_count_to_daily_stats(
|
|
171
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
301
172
|
daily_stats, day_number, last_day_number, 'subdomain', 'total_count', subdomain)
|
|
302
|
-
add_to_count_to_daily_stats(
|
|
173
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
303
174
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'total_count', url_no_parameters)
|
|
304
175
|
|
|
305
176
|
# Handle line if it has error.
|
|
306
177
|
if line['error'] != '':
|
|
307
|
-
add_to_count_to_daily_stats(
|
|
178
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
308
179
|
daily_stats, day_number, last_day_number, 'domain', 'error_count', main_domain)
|
|
309
|
-
add_to_count_to_daily_stats(
|
|
180
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
310
181
|
daily_stats, day_number, last_day_number, 'subdomain', 'error_count', subdomain)
|
|
311
|
-
add_to_count_to_daily_stats(
|
|
182
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
312
183
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'error_count', url_no_parameters)
|
|
313
184
|
else:
|
|
314
|
-
add_to_count_to_daily_stats(
|
|
185
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
315
186
|
daily_stats, day_number, last_day_number, 'domain', 'normal_count', main_domain)
|
|
316
|
-
add_to_count_to_daily_stats(
|
|
187
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
317
188
|
daily_stats, day_number, last_day_number, 'subdomain', 'normal_count', subdomain)
|
|
318
|
-
add_to_count_to_daily_stats(
|
|
189
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
319
190
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'normal_count', url_no_parameters)
|
|
320
191
|
|
|
321
192
|
if request_size == 0:
|
|
322
|
-
add_to_count_to_daily_stats(
|
|
193
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
323
194
|
daily_stats, day_number, last_day_number, 'domain', 'request_0_byte_count',
|
|
324
195
|
main_domain)
|
|
325
|
-
add_to_count_to_daily_stats(
|
|
196
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
326
197
|
daily_stats, day_number, last_day_number, 'subdomain', 'request_0_byte_count',
|
|
327
198
|
subdomain)
|
|
328
|
-
add_to_count_to_daily_stats(
|
|
199
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
329
200
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_0_byte_count',
|
|
330
201
|
url_no_parameters)
|
|
331
202
|
|
|
332
203
|
if response_size == 0:
|
|
333
|
-
add_to_count_to_daily_stats(
|
|
204
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
334
205
|
daily_stats, day_number, last_day_number, 'domain', 'response_0_byte_count',
|
|
335
206
|
main_domain)
|
|
336
|
-
add_to_count_to_daily_stats(
|
|
207
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
337
208
|
daily_stats, day_number, last_day_number, 'subdomain', 'response_0_byte_count',
|
|
338
209
|
subdomain)
|
|
339
|
-
add_to_count_to_daily_stats(
|
|
210
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
340
211
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_0_byte_count',
|
|
341
212
|
url_no_parameters)
|
|
342
213
|
|
|
343
214
|
if request_size is not None and response_size is not None:
|
|
344
|
-
add_to_list_to_daily_stats(
|
|
215
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
345
216
|
daily_stats, day_number, last_day_number, 'domain', 'request_sizes_list', main_domain, request_size)
|
|
346
|
-
add_to_list_to_daily_stats(
|
|
217
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
347
218
|
daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_list', subdomain, request_size)
|
|
348
|
-
add_to_list_to_daily_stats(
|
|
219
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
349
220
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_list', url_no_parameters,
|
|
350
221
|
request_size)
|
|
351
222
|
|
|
352
|
-
add_to_list_to_daily_stats(
|
|
223
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
353
224
|
daily_stats, day_number, last_day_number, 'domain', 'response_sizes_list', main_domain, response_size)
|
|
354
|
-
add_to_list_to_daily_stats(
|
|
225
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
355
226
|
daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_list', subdomain, response_size)
|
|
356
|
-
add_to_list_to_daily_stats(
|
|
227
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
357
228
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_list', url_no_parameters,
|
|
358
229
|
response_size)
|
|
359
230
|
|
|
360
231
|
if request_size != 0 and request_size is not None:
|
|
361
|
-
add_to_list_to_daily_stats(
|
|
232
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
362
233
|
daily_stats, day_number, last_day_number, 'domain', 'request_sizes_no_0_bytes_list',
|
|
363
234
|
main_domain, request_size)
|
|
364
|
-
add_to_list_to_daily_stats(
|
|
235
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
365
236
|
daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_no_0_bytes_list',
|
|
366
237
|
subdomain, request_size)
|
|
367
|
-
add_to_list_to_daily_stats(
|
|
238
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
368
239
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_no_0_bytes_list',
|
|
369
240
|
url_no_parameters, request_size)
|
|
370
241
|
|
|
371
242
|
if response_size != 0 and response_size is not None:
|
|
372
|
-
add_to_list_to_daily_stats(
|
|
243
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
373
244
|
daily_stats, day_number, last_day_number, 'domain', 'response_sizes_no_0_bytes_list',
|
|
374
245
|
main_domain, response_size)
|
|
375
|
-
add_to_list_to_daily_stats(
|
|
246
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
376
247
|
daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_no_0_bytes_list',
|
|
377
248
|
subdomain, response_size)
|
|
378
|
-
add_to_list_to_daily_stats(
|
|
249
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
379
250
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_no_0_bytes_list',
|
|
380
251
|
url_no_parameters, response_size)
|
|
381
252
|
|
|
@@ -397,7 +268,7 @@ def analyze(main_file_path: str):
|
|
|
397
268
|
|
|
398
269
|
for host_name, days in hosts.items():
|
|
399
270
|
for day, sizes in days.items():
|
|
400
|
-
add_to_average_to_daily_stats(
|
|
271
|
+
analyzer_helper.add_to_average_to_daily_stats(
|
|
401
272
|
daily_stats, day, last_day_number, host_type, feature_name, host_name, sizes)
|
|
402
273
|
|
|
403
274
|
# Sorting overall stats.
|
|
@@ -473,324 +344,37 @@ def analyze(main_file_path: str):
|
|
|
473
344
|
# ======================================================================================================================
|
|
474
345
|
|
|
475
346
|
|
|
476
|
-
def
|
|
477
|
-
|
|
478
|
-
moving_average_window_days,
|
|
479
|
-
top_bottom_deviation_percentage: float,
|
|
480
|
-
print_kwargs: dict = None
|
|
481
|
-
) -> list:
|
|
482
|
-
"""
|
|
483
|
-
This function calculates the moving average of the daily statistics.
|
|
484
|
-
|
|
485
|
-
:param file_path: string, the path to the 'statistics.csv' file.
|
|
486
|
-
:param moving_average_window_days: integer, the window size for the moving average.
|
|
487
|
-
:param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
|
|
488
|
-
bottom.
|
|
489
|
-
:param print_kwargs: dict, the print_api arguments.
|
|
490
|
-
"""
|
|
491
|
-
|
|
492
|
-
date_pattern: str = consts.DEFAULT_ROTATING_SUFFIXES_FROM_WHEN['midnight']
|
|
493
|
-
|
|
494
|
-
# Get all the file paths and their midnight rotations.
|
|
495
|
-
logs_paths: list = reading.get_logs_paths(
|
|
496
|
-
log_file_path=file_path,
|
|
497
|
-
date_pattern=date_pattern
|
|
498
|
-
)
|
|
499
|
-
|
|
500
|
-
statistics_content: dict = {}
|
|
501
|
-
# Read each file to its day.
|
|
502
|
-
for log_path_dict in logs_paths:
|
|
503
|
-
date_string = log_path_dict['date_string']
|
|
504
|
-
statistics_content[date_string] = {}
|
|
505
|
-
|
|
506
|
-
statistics_content[date_string]['file'] = log_path_dict
|
|
507
|
-
|
|
508
|
-
log_file_content, log_file_header = (
|
|
509
|
-
csvs.read_csv_to_list_of_dicts_by_header(log_path_dict['file_path'], **(print_kwargs or {})))
|
|
510
|
-
statistics_content[date_string]['content'] = log_file_content
|
|
511
|
-
statistics_content[date_string]['header'] = log_file_header
|
|
512
|
-
|
|
513
|
-
statistics_content[date_string]['content_no_errors'] = get_content_without_errors(log_file_content)
|
|
514
|
-
|
|
515
|
-
# Get the data dictionary from the statistics content.
|
|
516
|
-
statistics_content[date_string]['statistics_daily'] = compute_statistics_from_content(
|
|
517
|
-
statistics_content[date_string]['content_no_errors']
|
|
518
|
-
)
|
|
519
|
-
|
|
520
|
-
moving_average_dict: dict = compute_moving_averages_from_average_statistics(
|
|
521
|
-
statistics_content,
|
|
522
|
-
moving_average_window_days
|
|
523
|
-
)
|
|
524
|
-
|
|
525
|
-
# Add the moving average to the statistics content.
|
|
526
|
-
for day, day_dict in statistics_content.items():
|
|
527
|
-
try:
|
|
528
|
-
day_dict['moving_average'] = moving_average_dict[day]
|
|
529
|
-
except KeyError:
|
|
530
|
-
day_dict['moving_average'] = {}
|
|
531
|
-
|
|
532
|
-
# Find deviation from the moving average to the bottom or top by specified percentage.
|
|
533
|
-
deviation_list: list = find_deviation_from_moving_average(
|
|
534
|
-
statistics_content, top_bottom_deviation_percentage)
|
|
535
|
-
|
|
536
|
-
return deviation_list
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
def get_content_without_errors(content: list) -> list:
|
|
540
|
-
"""
|
|
541
|
-
This function gets the 'statistics.csv' file content without errors from the 'content' list.
|
|
542
|
-
|
|
543
|
-
:param content: list, the content list.
|
|
544
|
-
:return: list, the content without errors.
|
|
545
|
-
"""
|
|
546
|
-
|
|
547
|
-
traffic_statistics_without_errors: list = []
|
|
548
|
-
for line in content:
|
|
549
|
-
# Skip empty lines, headers and errors.
|
|
550
|
-
if line['host'] == 'host' or line['command'] == '':
|
|
551
|
-
continue
|
|
552
|
-
|
|
553
|
-
traffic_statistics_without_errors.append(line)
|
|
554
|
-
|
|
555
|
-
return traffic_statistics_without_errors
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
def get_data_dict_from_statistics_content(content: list) -> dict:
|
|
559
|
-
"""
|
|
560
|
-
This function gets the data dictionary from the 'statistics.csv' file content.
|
|
561
|
-
|
|
562
|
-
:param content: list, the content list.
|
|
563
|
-
:return: dict, the data dictionary.
|
|
564
|
-
"""
|
|
565
|
-
|
|
566
|
-
hosts_requests_responses: dict = {}
|
|
567
|
-
for line in content:
|
|
568
|
-
# If subdomain is not in the dictionary, add it.
|
|
569
|
-
if line['host'] not in hosts_requests_responses:
|
|
570
|
-
hosts_requests_responses[line['host']] = {
|
|
571
|
-
'request_sizes': [],
|
|
572
|
-
'response_sizes': []
|
|
573
|
-
}
|
|
574
|
-
|
|
575
|
-
# Append the sizes.
|
|
576
|
-
try:
|
|
577
|
-
hosts_requests_responses[line['host']]['request_sizes'].append(int(line['request_size_bytes']))
|
|
578
|
-
hosts_requests_responses[line['host']]['response_sizes'].append(
|
|
579
|
-
int(line['response_size_bytes']))
|
|
580
|
-
except ValueError:
|
|
581
|
-
print_api(line, color='yellow')
|
|
582
|
-
raise
|
|
583
|
-
|
|
584
|
-
return hosts_requests_responses
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
def compute_statistics_from_data_dict(data_dict: dict):
|
|
588
|
-
"""
|
|
589
|
-
This function computes the statistics from the data dictionary.
|
|
590
|
-
|
|
591
|
-
:param data_dict: dict, the data dictionary.
|
|
592
|
-
:return: dict, the statistics dictionary.
|
|
593
|
-
"""
|
|
594
|
-
|
|
595
|
-
for host, host_dict in data_dict.items():
|
|
596
|
-
count = len(host_dict['request_sizes'])
|
|
597
|
-
avg_request_size = statistics.mean(host_dict['request_sizes']) if count > 0 else 0
|
|
598
|
-
median_request_size = statistics.median(host_dict['request_sizes']) if count > 0 else 0
|
|
599
|
-
avg_response_size = statistics.mean(host_dict['response_sizes']) if count > 0 else 0
|
|
600
|
-
median_response_size = statistics.median(host_dict['response_sizes']) if count > 0 else 0
|
|
601
|
-
|
|
602
|
-
data_dict[host]['count'] = count
|
|
603
|
-
data_dict[host]['avg_request_size'] = avg_request_size
|
|
604
|
-
data_dict[host]['median_request_size'] = median_request_size
|
|
605
|
-
data_dict[host]['avg_response_size'] = avg_response_size
|
|
606
|
-
data_dict[host]['median_response_size'] = median_response_size
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
def compute_statistics_from_content(content: list):
|
|
610
|
-
"""
|
|
611
|
-
This function computes the statistics from the 'statistics.csv' file content.
|
|
612
|
-
|
|
613
|
-
:param content: list, the content list.
|
|
614
|
-
:return: dict, the statistics dictionary.
|
|
615
|
-
"""
|
|
616
|
-
|
|
617
|
-
hosts_requests_responses: dict = get_data_dict_from_statistics_content(content)
|
|
618
|
-
compute_statistics_from_data_dict(hosts_requests_responses)
|
|
619
|
-
|
|
620
|
-
return hosts_requests_responses
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
def compute_moving_averages_from_average_statistics(
|
|
624
|
-
average_statistics_dict: dict,
|
|
625
|
-
moving_average_window_days: int
|
|
626
|
-
):
|
|
627
|
-
"""
|
|
628
|
-
This function computes the moving averages from the average statistics dictionary.
|
|
629
|
-
|
|
630
|
-
:param average_statistics_dict: dict, the average statistics dictionary.
|
|
631
|
-
:param moving_average_window_days: integer, the window size for the moving average.
|
|
632
|
-
:return: dict, the moving averages dictionary.
|
|
633
|
-
"""
|
|
634
|
-
|
|
635
|
-
moving_average: dict = {}
|
|
636
|
-
for day_index, (day, day_dict) in enumerate(average_statistics_dict.items()):
|
|
637
|
-
current_day = day_index + 1
|
|
638
|
-
if current_day < moving_average_window_days:
|
|
639
|
-
continue
|
|
640
|
-
|
|
641
|
-
# Create list of the previous 'moving_average_window_days' days.
|
|
642
|
-
previous_days_content_list = (
|
|
643
|
-
list(average_statistics_dict.values()))[current_day-moving_average_window_days:current_day]
|
|
644
|
-
|
|
645
|
-
# Compute the moving averages.
|
|
646
|
-
moving_average[day] = compute_average_for_current_day_from_past_x_days(previous_days_content_list)
|
|
647
|
-
|
|
648
|
-
return moving_average
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
def compute_average_for_current_day_from_past_x_days(previous_days_content_list: list) -> dict:
|
|
652
|
-
"""
|
|
653
|
-
This function computes the average for the current day from the past x days.
|
|
654
|
-
|
|
655
|
-
:param previous_days_content_list: list, the list of the previous days content.
|
|
656
|
-
:return: dict, the average dictionary.
|
|
657
|
-
"""
|
|
658
|
-
|
|
659
|
-
moving_average: dict = {}
|
|
660
|
-
for entry in previous_days_content_list:
|
|
661
|
-
statistics_daily = entry['statistics_daily']
|
|
662
|
-
for host, host_dict in statistics_daily.items():
|
|
663
|
-
if host not in moving_average:
|
|
664
|
-
moving_average[host] = {
|
|
665
|
-
'counts': [],
|
|
666
|
-
'avg_request_sizes': [],
|
|
667
|
-
'avg_response_sizes': [],
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
moving_average[host]['counts'].append(int(host_dict['count']))
|
|
671
|
-
moving_average[host]['avg_request_sizes'].append(float(host_dict['avg_request_size']))
|
|
672
|
-
moving_average[host]['avg_response_sizes'].append(float(host_dict['avg_response_size']))
|
|
673
|
-
|
|
674
|
-
# Compute the moving average.
|
|
675
|
-
moving_average_results: dict = {}
|
|
676
|
-
for host, host_dict in moving_average.items():
|
|
677
|
-
ma_count = statistics.mean(host_dict['counts'])
|
|
678
|
-
ma_request_size = statistics.mean(host_dict['avg_request_sizes'])
|
|
679
|
-
ma_response_size = statistics.mean(host_dict['avg_response_sizes'])
|
|
680
|
-
|
|
681
|
-
moving_average_results[host] = {
|
|
682
|
-
'ma_count': ma_count,
|
|
683
|
-
'ma_request_size': ma_request_size,
|
|
684
|
-
'ma_response_size': ma_response_size,
|
|
685
|
-
'counts': host_dict['counts'],
|
|
686
|
-
'avg_request_sizes': host_dict['avg_request_sizes'],
|
|
687
|
-
'avg_response_sizes': host_dict['avg_response_sizes']
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
return moving_average_results
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
def find_deviation_from_moving_average(
|
|
694
|
-
statistics_content: dict,
|
|
695
|
-
top_bottom_deviation_percentage: float
|
|
696
|
-
) -> list:
|
|
697
|
-
"""
|
|
698
|
-
This function finds the deviation from the moving average to the bottom or top by specified percentage.
|
|
699
|
-
|
|
700
|
-
:param statistics_content: dict, the statistics content dictionary.
|
|
701
|
-
:param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
|
|
702
|
-
bottom.
|
|
703
|
-
:return: list, the deviation list.
|
|
704
|
-
"""
|
|
705
|
-
|
|
706
|
-
def _check_deviation(
|
|
707
|
-
check_type: Literal['count', 'avg_request_size', 'avg_response_size'],
|
|
708
|
-
ma_check_type: Literal['ma_count', 'ma_request_size', 'ma_response_size'],
|
|
709
|
-
day_statistics_content_dict: dict,
|
|
710
|
-
moving_averages_dict: dict
|
|
711
|
-
):
|
|
712
|
-
"""
|
|
713
|
-
This function checks the deviation for the host.
|
|
714
|
-
"""
|
|
715
|
-
|
|
716
|
-
nonlocal message
|
|
717
|
-
|
|
718
|
-
host_moving_average_by_type = moving_averages_dict[host][ma_check_type]
|
|
719
|
-
check_type_moving_by_percent = (
|
|
720
|
-
host_moving_average_by_type * top_bottom_deviation_percentage)
|
|
721
|
-
check_type_moving_above = host_moving_average_by_type + check_type_moving_by_percent
|
|
722
|
-
check_type_moving_below = host_moving_average_by_type - check_type_moving_by_percent
|
|
723
|
-
|
|
724
|
-
deviation_type = None
|
|
725
|
-
if day_statistics_content_dict[check_type] > check_type_moving_above:
|
|
726
|
-
deviation_type = 'above'
|
|
727
|
-
elif day_statistics_content_dict[check_type] < check_type_moving_below:
|
|
728
|
-
deviation_type = 'below'
|
|
729
|
-
|
|
730
|
-
if deviation_type:
|
|
731
|
-
message = f'[{check_type}] is [{deviation_type}] the moving average.'
|
|
732
|
-
deviation_list.append({
|
|
733
|
-
'day': day,
|
|
734
|
-
'host': host,
|
|
735
|
-
'message': message,
|
|
736
|
-
'value': day_statistics_content_dict[check_type],
|
|
737
|
-
'ma_value': host_moving_average_by_type,
|
|
738
|
-
'check_type': check_type,
|
|
739
|
-
'percentage': top_bottom_deviation_percentage,
|
|
740
|
-
'ma_value_checked': check_type_moving_above,
|
|
741
|
-
'deviation_type': deviation_type,
|
|
742
|
-
'data': day_statistics_content_dict,
|
|
743
|
-
'ma_data': moving_averages_dict[host]
|
|
744
|
-
})
|
|
745
|
-
|
|
746
|
-
deviation_list: list = []
|
|
747
|
-
for day_index, (day, day_dict) in enumerate(statistics_content.items()):
|
|
748
|
-
# If it's the first day, there is no previous day moving average.
|
|
749
|
-
if day_index == 0:
|
|
750
|
-
previous_day_moving_average_dict = {}
|
|
751
|
-
else:
|
|
752
|
-
previous_day_moving_average_dict = list(statistics_content.values())[day_index-1].get('moving_average', {})
|
|
753
|
-
|
|
754
|
-
# If there is no moving average for previous day continue to the next day.
|
|
755
|
-
if not previous_day_moving_average_dict:
|
|
756
|
-
continue
|
|
757
|
-
|
|
758
|
-
for host, host_dict in day_dict['statistics_daily'].items():
|
|
759
|
-
# If the host is not in the moving averages, then this is clear deviation.
|
|
760
|
-
# It means that in the current day, there were no requests for this host.
|
|
761
|
-
if host not in previous_day_moving_average_dict:
|
|
762
|
-
message = f'Host not in the moving averages: {host}'
|
|
763
|
-
deviation_list.append({
|
|
764
|
-
'day': day,
|
|
765
|
-
'host': host,
|
|
766
|
-
'data': host_dict,
|
|
767
|
-
'message': message,
|
|
768
|
-
'type': 'clear'
|
|
769
|
-
})
|
|
770
|
-
continue
|
|
771
|
-
|
|
772
|
-
_check_deviation(
|
|
773
|
-
'count', 'ma_count', host_dict, previous_day_moving_average_dict)
|
|
774
|
-
_check_deviation(
|
|
775
|
-
'avg_request_size', 'ma_request_size', host_dict, previous_day_moving_average_dict)
|
|
776
|
-
_check_deviation(
|
|
777
|
-
'avg_response_size', 'ma_response_size', host_dict, previous_day_moving_average_dict)
|
|
778
|
-
|
|
779
|
-
return deviation_list
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
def moving_average_calculator_main(
|
|
783
|
-
statistics_file_path: str,
|
|
347
|
+
def deviation_calculator_by_moving_average_main(
|
|
348
|
+
statistics_file_directory: str,
|
|
784
349
|
moving_average_window_days: int,
|
|
785
350
|
top_bottom_deviation_percentage: float,
|
|
351
|
+
get_deviation_for_last_day_only: bool = False,
|
|
352
|
+
summary: bool = False,
|
|
786
353
|
output_json_file_path: str = None
|
|
787
354
|
) -> Union[list, None]:
|
|
788
355
|
"""
|
|
789
356
|
This function is the main function for the moving average calculator.
|
|
790
357
|
|
|
791
|
-
:param
|
|
358
|
+
:param statistics_file_directory: string, the directory where 'statistics.csv' file resides.
|
|
359
|
+
Also, all the rotated files like: statistics_2021-01-01.csv, statistics_2021-01-02.csv, etc.
|
|
360
|
+
These will be analyzed in the order of the date in the file name.
|
|
792
361
|
:param moving_average_window_days: integer, the moving average window days.
|
|
793
362
|
:param top_bottom_deviation_percentage: float, the top bottom deviation percentage. Example: 0.1 for 10%.
|
|
363
|
+
:param get_deviation_for_last_day_only: bool, if True, only the last day will be analyzed.
|
|
364
|
+
Example: With 'moving_average_window_days=5', the last 6 days will be analyzed.
|
|
365
|
+
5 days for moving average and the last day for deviation.
|
|
366
|
+
File names example:
|
|
367
|
+
statistics_2021-01-01.csv
|
|
368
|
+
statistics_2021-01-02.csv
|
|
369
|
+
statistics_2021-01-03.csv
|
|
370
|
+
statistics_2021-01-04.csv
|
|
371
|
+
statistics_2021-01-05.csv
|
|
372
|
+
statistics_2021-01-06.csv
|
|
373
|
+
Files 01 to 05 will be used for moving average and the file 06 for deviation.
|
|
374
|
+
Meaning the average calculated for 2021-01-06 will be compared to the values moving average of 2021-01-01
|
|
375
|
+
to 2021-01-05.
|
|
376
|
+
:param summary: bool, if True, Only the summary will be generated without all the numbers that were used
|
|
377
|
+
to calculate the averages and the moving average data.
|
|
794
378
|
:param output_json_file_path: string, if None, no json file will be written.
|
|
795
379
|
-----------------------------
|
|
796
380
|
:return: the deviation list of dicts.
|
|
@@ -813,6 +397,8 @@ def moving_average_calculator_main(
|
|
|
813
397
|
sys.exit(main())
|
|
814
398
|
"""
|
|
815
399
|
|
|
400
|
+
statistics_file_path: str = f'{statistics_file_directory}{os.sep}{STATISTICS_FILE_NAME}'
|
|
401
|
+
|
|
816
402
|
def convert_data_value_to_string(value_key: str, list_index: int) -> None:
|
|
817
403
|
deviation_list[list_index]['data'][value_key] = json.dumps(deviation_list[list_index]['data'][value_key])
|
|
818
404
|
|
|
@@ -820,10 +406,11 @@ def moving_average_calculator_main(
|
|
|
820
406
|
if value_key in deviation_list[list_index]:
|
|
821
407
|
deviation_list[list_index][value_key] = json.dumps(deviation_list[list_index][value_key])
|
|
822
408
|
|
|
823
|
-
deviation_list = calculate_moving_average(
|
|
409
|
+
deviation_list = moving_average_helper.calculate_moving_average(
|
|
824
410
|
statistics_file_path,
|
|
825
411
|
moving_average_window_days,
|
|
826
|
-
top_bottom_deviation_percentage
|
|
412
|
+
top_bottom_deviation_percentage,
|
|
413
|
+
get_deviation_for_last_day_only
|
|
827
414
|
)
|
|
828
415
|
|
|
829
416
|
if deviation_list:
|
|
@@ -836,6 +423,20 @@ def moving_average_calculator_main(
|
|
|
836
423
|
print_api(f'Deviation Found, saving to file: {output_json_file_path}', color='blue')
|
|
837
424
|
jsons.write_json_file(deviation_list, output_json_file_path, use_default_indent=True)
|
|
838
425
|
|
|
426
|
+
if summary:
|
|
427
|
+
summary_deviation_list: list = []
|
|
428
|
+
for deviation in deviation_list:
|
|
429
|
+
summary_deviation_list.append({
|
|
430
|
+
'day': deviation['day'],
|
|
431
|
+
'host': deviation['host'],
|
|
432
|
+
'message': deviation['message'],
|
|
433
|
+
'value': deviation['value'],
|
|
434
|
+
'ma_value': deviation['ma_value'],
|
|
435
|
+
'total_entries_averaged': deviation['data']['count']
|
|
436
|
+
})
|
|
437
|
+
|
|
438
|
+
deviation_list = summary_deviation_list
|
|
439
|
+
|
|
839
440
|
return deviation_list
|
|
840
441
|
|
|
841
442
|
return None
|