atomicshop 2.14.11__py3-none-any.whl → 2.14.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of atomicshop might be problematic. Click here for more details.
- atomicshop/__init__.py +1 -1
- atomicshop/config_init.py +1 -1
- atomicshop/etws/traces/trace_dns.py +5 -1
- atomicshop/filesystem.py +3 -3
- atomicshop/mitm/import_config.py +3 -3
- atomicshop/mitm/statistic_analyzer.py +94 -492
- atomicshop/mitm/statistic_analyzer_helper/__init__.py +0 -0
- atomicshop/mitm/statistic_analyzer_helper/analyzer_helper.py +136 -0
- atomicshop/mitm/statistic_analyzer_helper/moving_average_helper.py +330 -0
- atomicshop/question_answer_engine.py +2 -2
- atomicshop/wrappers/elasticsearchw/infrastructure.py +1 -1
- atomicshop/wrappers/socketw/socket_client.py +1 -1
- {atomicshop-2.14.11.dist-info → atomicshop-2.14.13.dist-info}/METADATA +1 -1
- {atomicshop-2.14.11.dist-info → atomicshop-2.14.13.dist-info}/RECORD +17 -14
- {atomicshop-2.14.11.dist-info → atomicshop-2.14.13.dist-info}/LICENSE.txt +0 -0
- {atomicshop-2.14.11.dist-info → atomicshop-2.14.13.dist-info}/WHEEL +0 -0
- {atomicshop-2.14.11.dist-info → atomicshop-2.14.13.dist-info}/top_level.txt +0 -0
|
@@ -1,147 +1,17 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import datetime
|
|
3
|
-
import statistics
|
|
4
3
|
import json
|
|
5
|
-
from typing import
|
|
4
|
+
from typing import Union
|
|
6
5
|
|
|
6
|
+
from .statistic_analyzer_helper import analyzer_helper, moving_average_helper
|
|
7
7
|
from .. import filesystem, domains, datetimes, urls
|
|
8
8
|
from ..basics import dicts
|
|
9
|
-
from ..file_io import tomls, xlsxs,
|
|
9
|
+
from ..file_io import tomls, xlsxs, jsons
|
|
10
10
|
from ..wrappers.loggingw import reading
|
|
11
11
|
from ..print_api import print_api
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
"""
|
|
16
|
-
This function gets the last day number from the statistics content.
|
|
17
|
-
|
|
18
|
-
:param statistics_content: list, of lines in the statistics content.
|
|
19
|
-
:param stop_after_lines: integer, if specified, the function will stop after the specified number of lines.
|
|
20
|
-
:return: integer, the last day number.
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
last_day_number = None
|
|
24
|
-
start_time_temp = None
|
|
25
|
-
for line_index, line in enumerate(statistics_content):
|
|
26
|
-
try:
|
|
27
|
-
request_time = datetime.datetime.strptime(line['request_time_sent'], '%Y-%m-%d %H:%M:%S.%f')
|
|
28
|
-
except ValueError:
|
|
29
|
-
continue
|
|
30
|
-
|
|
31
|
-
if not start_time_temp:
|
|
32
|
-
start_time_temp = request_time
|
|
33
|
-
|
|
34
|
-
if stop_after_lines:
|
|
35
|
-
if line_index == stop_after_lines:
|
|
36
|
-
break
|
|
37
|
-
|
|
38
|
-
last_day_number = datetimes.get_difference_between_dates_in_days(start_time_temp, request_time)
|
|
39
|
-
return last_day_number
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def create_empty_features_dict() -> dict:
|
|
43
|
-
"""
|
|
44
|
-
This function creates an empty dictionary for the daily stats. This should be initiated for each 'host_type' of:
|
|
45
|
-
'domain', 'subdomain', 'url_no_parameters'.
|
|
46
|
-
:return: dict
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
return {
|
|
50
|
-
'total_count': {}, 'normal_count': {}, 'error_count': {},
|
|
51
|
-
'request_0_byte_count': {}, 'response_0_byte_count': {},
|
|
52
|
-
'request_sizes_list': {}, 'response_sizes_list': {},
|
|
53
|
-
'request_sizes_no_0_bytes_list': {}, 'response_sizes_no_0_bytes_list': {},
|
|
54
|
-
'average_request_size': {}, 'average_response_size': {},
|
|
55
|
-
'average_request_size_no_0_bytes': {}, 'average_response_size_no_0_bytes': {}}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def add_to_count_to_daily_stats(
|
|
59
|
-
daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str) -> None:
|
|
60
|
-
"""
|
|
61
|
-
This function adds 1 to the 'count' feature of the current day in the daily stats.
|
|
62
|
-
|
|
63
|
-
:param daily_stats: dict, the daily statistics dict.
|
|
64
|
-
:param current_day: integer, the current day number.
|
|
65
|
-
:param last_day: integer, the last day number.
|
|
66
|
-
:param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
|
|
67
|
-
:param feature: string, the feature to add the count to. Can be: 'total_count', 'normal_count', 'error_count',
|
|
68
|
-
'request_0_byte_count', 'response_0_byte_count'.
|
|
69
|
-
:param host_name: string, the name of the host.
|
|
70
|
-
|
|
71
|
-
:return: None.
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
# Aggregate daily domain hits.
|
|
75
|
-
if host_name not in daily_stats[host_type][feature].keys():
|
|
76
|
-
daily_stats[host_type][feature][host_name] = {}
|
|
77
|
-
# Iterate from first day to the last day.
|
|
78
|
-
for day in range(0, last_day + 1):
|
|
79
|
-
daily_stats[host_type][feature][host_name][day] = 0
|
|
80
|
-
|
|
81
|
-
# Add count to current day.
|
|
82
|
-
daily_stats[host_type][feature][host_name][current_day] += 1
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def add_to_list_to_daily_stats(
|
|
86
|
-
daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
|
|
87
|
-
size: float) -> None:
|
|
88
|
-
"""
|
|
89
|
-
This function adds the 'size' to the 'feature' list of the current day in the daily stats.
|
|
90
|
-
|
|
91
|
-
:param daily_stats: dict, the daily statistics dict.
|
|
92
|
-
:param current_day: integer, the current day number.
|
|
93
|
-
:param last_day: integer, the last day number.
|
|
94
|
-
:param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
|
|
95
|
-
:param feature: string, the feature to add the count to. Can be: 'request_sizes_list', 'response_sizes_list',
|
|
96
|
-
'request_sizes_no_0_bytes_list', 'response_sizes_no_0_bytes_list'.
|
|
97
|
-
:param host_name: string, the name of the host.
|
|
98
|
-
:param size: float, the size in bytes to add to the list.
|
|
99
|
-
|
|
100
|
-
:return: None.
|
|
101
|
-
"""
|
|
102
|
-
|
|
103
|
-
# Aggregate daily domain hits.
|
|
104
|
-
if host_name not in daily_stats[host_type][feature].keys():
|
|
105
|
-
daily_stats[host_type][feature][host_name] = {}
|
|
106
|
-
# Iterate from first day to the last day.
|
|
107
|
-
for day in range(0, last_day + 1):
|
|
108
|
-
daily_stats[host_type][feature][host_name][day] = []
|
|
109
|
-
|
|
110
|
-
# Add count to current day.
|
|
111
|
-
daily_stats[host_type][feature][host_name][current_day].append(size)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def add_to_average_to_daily_stats(
|
|
115
|
-
daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
|
|
116
|
-
list_of_sizes: list) -> None:
|
|
117
|
-
"""
|
|
118
|
-
This function adds the average size in bytes calculated from the 'list_of_sizes' to the 'feature' of the current
|
|
119
|
-
day in the daily stats.
|
|
120
|
-
|
|
121
|
-
:param daily_stats: dict, the daily statistics dict.
|
|
122
|
-
:param current_day: integer, the current day number.
|
|
123
|
-
:param last_day: integer, the last day number.
|
|
124
|
-
:param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
|
|
125
|
-
:param feature: string, the feature to add the count to. Can be: 'average_request_size', 'average_response_size',
|
|
126
|
-
'average_request_size_no_0_bytes', 'average_response_size_no_0_bytes'.
|
|
127
|
-
:param host_name: string, the name of the host.
|
|
128
|
-
:param list_of_sizes: list, the list of sizes to calculate the average from.
|
|
129
|
-
|
|
130
|
-
:return: None.
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
# Aggregate daily domain hits.
|
|
134
|
-
if host_name not in daily_stats[host_type][feature].keys():
|
|
135
|
-
daily_stats[host_type][feature][host_name] = {}
|
|
136
|
-
# Iterate from first day to the last day.
|
|
137
|
-
for day in range(0, last_day + 1):
|
|
138
|
-
daily_stats[host_type][feature][host_name][day] = 0
|
|
139
|
-
|
|
140
|
-
# If the list of size is empty, add 0 to the average, since we cannot divide by 0.
|
|
141
|
-
if len(list_of_sizes) == 0:
|
|
142
|
-
daily_stats[host_type][feature][host_name][current_day] = 0
|
|
143
|
-
else:
|
|
144
|
-
daily_stats[host_type][feature][host_name][current_day] = sum(list_of_sizes) / len(list_of_sizes)
|
|
14
|
+
STATISTICS_FILE_NAME: str = 'statistics.csv'
|
|
145
15
|
|
|
146
16
|
|
|
147
17
|
def analyze(main_file_path: str):
|
|
@@ -173,9 +43,9 @@ def analyze(main_file_path: str):
|
|
|
173
43
|
'subdomain': {'total_count': {}, 'normal_count': {}, 'error_count': {}}
|
|
174
44
|
}
|
|
175
45
|
daily_stats: dict = {
|
|
176
|
-
'domain': create_empty_features_dict(),
|
|
177
|
-
'subdomain': create_empty_features_dict(),
|
|
178
|
-
'url_no_parameters': create_empty_features_dict()
|
|
46
|
+
'domain': analyzer_helper.create_empty_features_dict(),
|
|
47
|
+
'subdomain': analyzer_helper.create_empty_features_dict(),
|
|
48
|
+
'url_no_parameters': analyzer_helper.create_empty_features_dict()
|
|
179
49
|
}
|
|
180
50
|
|
|
181
51
|
# Start the main loop.
|
|
@@ -196,7 +66,7 @@ def analyze(main_file_path: str):
|
|
|
196
66
|
|
|
197
67
|
# Find the last day number. If 'break_after_lines' is specified, the loop will stop after the specified line.
|
|
198
68
|
if not last_day_number:
|
|
199
|
-
last_day_number = get_the_last_day_number(statistics_content, break_after_lines)
|
|
69
|
+
last_day_number = analyzer_helper.get_the_last_day_number(statistics_content, break_after_lines)
|
|
200
70
|
|
|
201
71
|
if break_after_lines:
|
|
202
72
|
if line_index == break_after_lines:
|
|
@@ -296,87 +166,87 @@ def analyze(main_file_path: str):
|
|
|
296
166
|
day_number = datetimes.get_difference_between_dates_in_days(start_time, request_time)
|
|
297
167
|
|
|
298
168
|
# Add 1 to the total count of the current day.
|
|
299
|
-
add_to_count_to_daily_stats(
|
|
169
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
300
170
|
daily_stats, day_number, last_day_number, 'domain', 'total_count', main_domain)
|
|
301
|
-
add_to_count_to_daily_stats(
|
|
171
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
302
172
|
daily_stats, day_number, last_day_number, 'subdomain', 'total_count', subdomain)
|
|
303
|
-
add_to_count_to_daily_stats(
|
|
173
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
304
174
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'total_count', url_no_parameters)
|
|
305
175
|
|
|
306
176
|
# Handle line if it has error.
|
|
307
177
|
if line['error'] != '':
|
|
308
|
-
add_to_count_to_daily_stats(
|
|
178
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
309
179
|
daily_stats, day_number, last_day_number, 'domain', 'error_count', main_domain)
|
|
310
|
-
add_to_count_to_daily_stats(
|
|
180
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
311
181
|
daily_stats, day_number, last_day_number, 'subdomain', 'error_count', subdomain)
|
|
312
|
-
add_to_count_to_daily_stats(
|
|
182
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
313
183
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'error_count', url_no_parameters)
|
|
314
184
|
else:
|
|
315
|
-
add_to_count_to_daily_stats(
|
|
185
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
316
186
|
daily_stats, day_number, last_day_number, 'domain', 'normal_count', main_domain)
|
|
317
|
-
add_to_count_to_daily_stats(
|
|
187
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
318
188
|
daily_stats, day_number, last_day_number, 'subdomain', 'normal_count', subdomain)
|
|
319
|
-
add_to_count_to_daily_stats(
|
|
189
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
320
190
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'normal_count', url_no_parameters)
|
|
321
191
|
|
|
322
192
|
if request_size == 0:
|
|
323
|
-
add_to_count_to_daily_stats(
|
|
193
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
324
194
|
daily_stats, day_number, last_day_number, 'domain', 'request_0_byte_count',
|
|
325
195
|
main_domain)
|
|
326
|
-
add_to_count_to_daily_stats(
|
|
196
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
327
197
|
daily_stats, day_number, last_day_number, 'subdomain', 'request_0_byte_count',
|
|
328
198
|
subdomain)
|
|
329
|
-
add_to_count_to_daily_stats(
|
|
199
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
330
200
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_0_byte_count',
|
|
331
201
|
url_no_parameters)
|
|
332
202
|
|
|
333
203
|
if response_size == 0:
|
|
334
|
-
add_to_count_to_daily_stats(
|
|
204
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
335
205
|
daily_stats, day_number, last_day_number, 'domain', 'response_0_byte_count',
|
|
336
206
|
main_domain)
|
|
337
|
-
add_to_count_to_daily_stats(
|
|
207
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
338
208
|
daily_stats, day_number, last_day_number, 'subdomain', 'response_0_byte_count',
|
|
339
209
|
subdomain)
|
|
340
|
-
add_to_count_to_daily_stats(
|
|
210
|
+
analyzer_helper.add_to_count_to_daily_stats(
|
|
341
211
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_0_byte_count',
|
|
342
212
|
url_no_parameters)
|
|
343
213
|
|
|
344
214
|
if request_size is not None and response_size is not None:
|
|
345
|
-
add_to_list_to_daily_stats(
|
|
215
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
346
216
|
daily_stats, day_number, last_day_number, 'domain', 'request_sizes_list', main_domain, request_size)
|
|
347
|
-
add_to_list_to_daily_stats(
|
|
217
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
348
218
|
daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_list', subdomain, request_size)
|
|
349
|
-
add_to_list_to_daily_stats(
|
|
219
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
350
220
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_list', url_no_parameters,
|
|
351
221
|
request_size)
|
|
352
222
|
|
|
353
|
-
add_to_list_to_daily_stats(
|
|
223
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
354
224
|
daily_stats, day_number, last_day_number, 'domain', 'response_sizes_list', main_domain, response_size)
|
|
355
|
-
add_to_list_to_daily_stats(
|
|
225
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
356
226
|
daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_list', subdomain, response_size)
|
|
357
|
-
add_to_list_to_daily_stats(
|
|
227
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
358
228
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_list', url_no_parameters,
|
|
359
229
|
response_size)
|
|
360
230
|
|
|
361
231
|
if request_size != 0 and request_size is not None:
|
|
362
|
-
add_to_list_to_daily_stats(
|
|
232
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
363
233
|
daily_stats, day_number, last_day_number, 'domain', 'request_sizes_no_0_bytes_list',
|
|
364
234
|
main_domain, request_size)
|
|
365
|
-
add_to_list_to_daily_stats(
|
|
235
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
366
236
|
daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_no_0_bytes_list',
|
|
367
237
|
subdomain, request_size)
|
|
368
|
-
add_to_list_to_daily_stats(
|
|
238
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
369
239
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_no_0_bytes_list',
|
|
370
240
|
url_no_parameters, request_size)
|
|
371
241
|
|
|
372
242
|
if response_size != 0 and response_size is not None:
|
|
373
|
-
add_to_list_to_daily_stats(
|
|
243
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
374
244
|
daily_stats, day_number, last_day_number, 'domain', 'response_sizes_no_0_bytes_list',
|
|
375
245
|
main_domain, response_size)
|
|
376
|
-
add_to_list_to_daily_stats(
|
|
246
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
377
247
|
daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_no_0_bytes_list',
|
|
378
248
|
subdomain, response_size)
|
|
379
|
-
add_to_list_to_daily_stats(
|
|
249
|
+
analyzer_helper.add_to_list_to_daily_stats(
|
|
380
250
|
daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_no_0_bytes_list',
|
|
381
251
|
url_no_parameters, response_size)
|
|
382
252
|
|
|
@@ -398,7 +268,7 @@ def analyze(main_file_path: str):
|
|
|
398
268
|
|
|
399
269
|
for host_name, days in hosts.items():
|
|
400
270
|
for day, sizes in days.items():
|
|
401
|
-
add_to_average_to_daily_stats(
|
|
271
|
+
analyzer_helper.add_to_average_to_daily_stats(
|
|
402
272
|
daily_stats, day, last_day_number, host_type, feature_name, host_name, sizes)
|
|
403
273
|
|
|
404
274
|
# Sorting overall stats.
|
|
@@ -474,327 +344,40 @@ def analyze(main_file_path: str):
|
|
|
474
344
|
# ======================================================================================================================
|
|
475
345
|
|
|
476
346
|
|
|
477
|
-
def
|
|
478
|
-
|
|
479
|
-
moving_average_window_days,
|
|
480
|
-
top_bottom_deviation_percentage: float,
|
|
481
|
-
print_kwargs: dict = None
|
|
482
|
-
):
|
|
483
|
-
"""
|
|
484
|
-
This function calculates the moving average of the daily statistics.
|
|
485
|
-
|
|
486
|
-
:param file_path: string, the path to the 'statistics.csv' file.
|
|
487
|
-
:param moving_average_window_days: integer, the window size for the moving average.
|
|
488
|
-
:param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
|
|
489
|
-
bottom.
|
|
490
|
-
:param print_kwargs: dict, the print_api arguments.
|
|
491
|
-
"""
|
|
492
|
-
|
|
493
|
-
date_pattern: str = '%Y_%m_%d'
|
|
494
|
-
|
|
495
|
-
# Get all the file paths and their midnight rotations.
|
|
496
|
-
logs_paths: list = reading.get_logs_paths(
|
|
497
|
-
log_file_path=file_path,
|
|
498
|
-
date_pattern=date_pattern
|
|
499
|
-
)
|
|
500
|
-
|
|
501
|
-
statistics_content: dict = {}
|
|
502
|
-
# Read each file to its day.
|
|
503
|
-
for log_path_dict in logs_paths:
|
|
504
|
-
date_string = log_path_dict['date_string']
|
|
505
|
-
statistics_content[date_string] = {}
|
|
506
|
-
|
|
507
|
-
statistics_content[date_string]['file'] = log_path_dict
|
|
508
|
-
|
|
509
|
-
log_file_content, log_file_header = (
|
|
510
|
-
csvs.read_csv_to_list_of_dicts_by_header(log_path_dict['file_path'], **(print_kwargs or {})))
|
|
511
|
-
statistics_content[date_string]['content'] = log_file_content
|
|
512
|
-
statistics_content[date_string]['header'] = log_file_header
|
|
513
|
-
|
|
514
|
-
statistics_content[date_string]['content_no_errors'] = get_content_without_errors(log_file_content)
|
|
515
|
-
|
|
516
|
-
# Get the data dictionary from the statistics content.
|
|
517
|
-
statistics_content[date_string]['statistics_daily'] = compute_statistics_from_content(
|
|
518
|
-
statistics_content[date_string]['content_no_errors']
|
|
519
|
-
)
|
|
520
|
-
|
|
521
|
-
moving_average_dict: dict = compute_moving_averages_from_average_statistics(
|
|
522
|
-
statistics_content,
|
|
523
|
-
moving_average_window_days
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
# Add the moving average to the statistics content.
|
|
527
|
-
for day, day_dict in statistics_content.items():
|
|
528
|
-
try:
|
|
529
|
-
day_dict['moving_average'] = moving_average_dict[day]
|
|
530
|
-
except KeyError:
|
|
531
|
-
day_dict['moving_average'] = {}
|
|
532
|
-
|
|
533
|
-
# Find deviation from the moving average to the bottom or top by specified percentage.
|
|
534
|
-
deviation_list: list = find_deviation_from_moving_average(
|
|
535
|
-
statistics_content, top_bottom_deviation_percentage)
|
|
536
|
-
|
|
537
|
-
return deviation_list
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
def get_content_without_errors(content: list) -> list:
|
|
541
|
-
"""
|
|
542
|
-
This function gets the 'statistics.csv' file content without errors from the 'content' list.
|
|
543
|
-
|
|
544
|
-
:param content: list, the content list.
|
|
545
|
-
:return: list, the content without errors.
|
|
546
|
-
"""
|
|
547
|
-
|
|
548
|
-
traffic_statistics_without_errors: list = []
|
|
549
|
-
for line in content:
|
|
550
|
-
# Skip empty lines, headers and errors.
|
|
551
|
-
if line['host'] == 'host' or line['command'] == '':
|
|
552
|
-
continue
|
|
553
|
-
|
|
554
|
-
traffic_statistics_without_errors.append(line)
|
|
555
|
-
|
|
556
|
-
return traffic_statistics_without_errors
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
def get_data_dict_from_statistics_content(content: list) -> dict:
|
|
560
|
-
"""
|
|
561
|
-
This function gets the data dictionary from the 'statistics.csv' file content.
|
|
562
|
-
|
|
563
|
-
:param content: list, the content list.
|
|
564
|
-
:return: dict, the data dictionary.
|
|
565
|
-
"""
|
|
566
|
-
|
|
567
|
-
hosts_requests_responses: dict = {}
|
|
568
|
-
for line in content:
|
|
569
|
-
# If subdomain is not in the dictionary, add it.
|
|
570
|
-
if line['host'] not in hosts_requests_responses:
|
|
571
|
-
hosts_requests_responses[line['host']] = {
|
|
572
|
-
'request_sizes': [],
|
|
573
|
-
'response_sizes': []
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
# Append the sizes.
|
|
577
|
-
try:
|
|
578
|
-
hosts_requests_responses[line['host']]['request_sizes'].append(int(line['request_size_bytes']))
|
|
579
|
-
hosts_requests_responses[line['host']]['response_sizes'].append(
|
|
580
|
-
int(line['response_size_bytes']))
|
|
581
|
-
except ValueError:
|
|
582
|
-
print_api(line, color='yellow')
|
|
583
|
-
raise
|
|
584
|
-
|
|
585
|
-
return hosts_requests_responses
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
def compute_statistics_from_data_dict(data_dict: dict):
|
|
589
|
-
"""
|
|
590
|
-
This function computes the statistics from the data dictionary.
|
|
591
|
-
|
|
592
|
-
:param data_dict: dict, the data dictionary.
|
|
593
|
-
:return: dict, the statistics dictionary.
|
|
594
|
-
"""
|
|
595
|
-
|
|
596
|
-
for host, host_dict in data_dict.items():
|
|
597
|
-
count = len(host_dict['request_sizes'])
|
|
598
|
-
avg_request_size = statistics.mean(host_dict['request_sizes']) if count > 0 else 0
|
|
599
|
-
median_request_size = statistics.median(host_dict['request_sizes']) if count > 0 else 0
|
|
600
|
-
avg_response_size = statistics.mean(host_dict['response_sizes']) if count > 0 else 0
|
|
601
|
-
median_response_size = statistics.median(host_dict['response_sizes']) if count > 0 else 0
|
|
602
|
-
|
|
603
|
-
data_dict[host]['count'] = count
|
|
604
|
-
data_dict[host]['avg_request_size'] = avg_request_size
|
|
605
|
-
data_dict[host]['median_request_size'] = median_request_size
|
|
606
|
-
data_dict[host]['avg_response_size'] = avg_response_size
|
|
607
|
-
data_dict[host]['median_response_size'] = median_response_size
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
def compute_statistics_from_content(content: list):
|
|
611
|
-
"""
|
|
612
|
-
This function computes the statistics from the 'statistics.csv' file content.
|
|
613
|
-
|
|
614
|
-
:param content: list, the content list.
|
|
615
|
-
:return: dict, the statistics dictionary.
|
|
616
|
-
"""
|
|
617
|
-
|
|
618
|
-
hosts_requests_responses: dict = get_data_dict_from_statistics_content(content)
|
|
619
|
-
compute_statistics_from_data_dict(hosts_requests_responses)
|
|
620
|
-
|
|
621
|
-
return hosts_requests_responses
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
def compute_moving_averages_from_average_statistics(
|
|
625
|
-
average_statistics_dict: dict,
|
|
626
|
-
moving_average_window_days: int
|
|
627
|
-
):
|
|
628
|
-
"""
|
|
629
|
-
This function computes the moving averages from the average statistics dictionary.
|
|
630
|
-
|
|
631
|
-
:param average_statistics_dict: dict, the average statistics dictionary.
|
|
632
|
-
:param moving_average_window_days: integer, the window size for the moving average.
|
|
633
|
-
:return: dict, the moving averages dictionary.
|
|
634
|
-
"""
|
|
635
|
-
|
|
636
|
-
moving_average: dict = {}
|
|
637
|
-
for day_index, (day, day_dict) in enumerate(average_statistics_dict.items()):
|
|
638
|
-
current_day = day_index + 1
|
|
639
|
-
if current_day < moving_average_window_days:
|
|
640
|
-
continue
|
|
641
|
-
|
|
642
|
-
# Create list of the previous 'moving_average_window_days' days.
|
|
643
|
-
previous_days_content_list = (
|
|
644
|
-
list(average_statistics_dict.values()))[current_day-moving_average_window_days:current_day]
|
|
645
|
-
|
|
646
|
-
# Compute the moving averages.
|
|
647
|
-
moving_average[day] = compute_average_for_current_day_from_past_x_days(previous_days_content_list)
|
|
648
|
-
|
|
649
|
-
return moving_average
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
def compute_average_for_current_day_from_past_x_days(previous_days_content_list: list) -> dict:
|
|
653
|
-
"""
|
|
654
|
-
This function computes the average for the current day from the past x days.
|
|
655
|
-
|
|
656
|
-
:param previous_days_content_list: list, the list of the previous days content.
|
|
657
|
-
:return: dict, the average dictionary.
|
|
658
|
-
"""
|
|
659
|
-
|
|
660
|
-
moving_average: dict = {}
|
|
661
|
-
for entry in previous_days_content_list:
|
|
662
|
-
statistics_daily = entry['statistics_daily']
|
|
663
|
-
for host, host_dict in statistics_daily.items():
|
|
664
|
-
if host not in moving_average:
|
|
665
|
-
moving_average[host] = {
|
|
666
|
-
'counts': [],
|
|
667
|
-
'avg_request_sizes': [],
|
|
668
|
-
'avg_response_sizes': [],
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
moving_average[host]['counts'].append(int(host_dict['count']))
|
|
672
|
-
moving_average[host]['avg_request_sizes'].append(float(host_dict['avg_request_size']))
|
|
673
|
-
moving_average[host]['avg_response_sizes'].append(float(host_dict['avg_response_size']))
|
|
674
|
-
|
|
675
|
-
# Compute the moving average.
|
|
676
|
-
moving_average_results: dict = {}
|
|
677
|
-
for host, host_dict in moving_average.items():
|
|
678
|
-
ma_count = statistics.mean(host_dict['counts'])
|
|
679
|
-
ma_request_size = statistics.mean(host_dict['avg_request_sizes'])
|
|
680
|
-
ma_response_size = statistics.mean(host_dict['avg_response_sizes'])
|
|
681
|
-
|
|
682
|
-
moving_average_results[host] = {
|
|
683
|
-
'ma_count': ma_count,
|
|
684
|
-
'ma_request_size': ma_request_size,
|
|
685
|
-
'ma_response_size': ma_response_size,
|
|
686
|
-
'counts': host_dict['counts'],
|
|
687
|
-
'avg_request_sizes': host_dict['avg_request_sizes'],
|
|
688
|
-
'avg_response_sizes': host_dict['avg_response_sizes']
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
return moving_average_results
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
def find_deviation_from_moving_average(
|
|
695
|
-
statistics_content: dict,
|
|
696
|
-
top_bottom_deviation_percentage: float
|
|
697
|
-
) -> list:
|
|
698
|
-
"""
|
|
699
|
-
This function finds the deviation from the moving average to the bottom or top by specified percentage.
|
|
700
|
-
|
|
701
|
-
:param statistics_content: dict, the statistics content dictionary.
|
|
702
|
-
:param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
|
|
703
|
-
bottom.
|
|
704
|
-
:return: list, the deviation list.
|
|
705
|
-
"""
|
|
706
|
-
|
|
707
|
-
def _check_deviation(
|
|
708
|
-
check_type: Literal['count', 'avg_request_size', 'avg_response_size'],
|
|
709
|
-
ma_check_type: Literal['ma_count', 'ma_request_size', 'ma_response_size'],
|
|
710
|
-
day_statistics_content_dict: dict,
|
|
711
|
-
moving_averages_dict: dict
|
|
712
|
-
):
|
|
713
|
-
"""
|
|
714
|
-
This function checks the deviation for the host.
|
|
715
|
-
"""
|
|
716
|
-
|
|
717
|
-
nonlocal message
|
|
718
|
-
|
|
719
|
-
host_moving_average_by_type = moving_averages_dict[host][ma_check_type]
|
|
720
|
-
check_type_moving_by_percent = (
|
|
721
|
-
host_moving_average_by_type * top_bottom_deviation_percentage)
|
|
722
|
-
check_type_moving_above = host_moving_average_by_type + check_type_moving_by_percent
|
|
723
|
-
check_type_moving_below = host_moving_average_by_type - check_type_moving_by_percent
|
|
724
|
-
|
|
725
|
-
deviation_type = None
|
|
726
|
-
if day_statistics_content_dict[check_type] > check_type_moving_above:
|
|
727
|
-
deviation_type = 'above'
|
|
728
|
-
elif day_statistics_content_dict[check_type] < check_type_moving_below:
|
|
729
|
-
deviation_type = 'below'
|
|
730
|
-
|
|
731
|
-
if deviation_type:
|
|
732
|
-
message = f'[{check_type}] is [{deviation_type}] the moving average.'
|
|
733
|
-
deviation_list.append({
|
|
734
|
-
'day': day,
|
|
735
|
-
'host': host,
|
|
736
|
-
'message': message,
|
|
737
|
-
'value': day_statistics_content_dict[check_type],
|
|
738
|
-
'ma_value': host_moving_average_by_type,
|
|
739
|
-
'check_type': check_type,
|
|
740
|
-
'percentage': top_bottom_deviation_percentage,
|
|
741
|
-
'ma_value_checked': check_type_moving_above,
|
|
742
|
-
'deviation_type': deviation_type,
|
|
743
|
-
'data': day_statistics_content_dict,
|
|
744
|
-
'ma_data': moving_averages_dict[host]
|
|
745
|
-
})
|
|
746
|
-
|
|
747
|
-
deviation_list: list = []
|
|
748
|
-
for day_index, (day, day_dict) in enumerate(statistics_content.items()):
|
|
749
|
-
# If it's the first day, there is no previous day moving average.
|
|
750
|
-
if day_index == 0:
|
|
751
|
-
previous_day_moving_average_dict = {}
|
|
752
|
-
else:
|
|
753
|
-
previous_day_moving_average_dict = list(statistics_content.values())[day_index-1].get('moving_average', {})
|
|
754
|
-
|
|
755
|
-
# If there is no moving average for previous day continue to the next day.
|
|
756
|
-
if not previous_day_moving_average_dict:
|
|
757
|
-
continue
|
|
758
|
-
|
|
759
|
-
for host, host_dict in day_dict['statistics_daily'].items():
|
|
760
|
-
# If the host is not in the moving averages, then this is clear deviation.
|
|
761
|
-
# It means that in the current day, there were no requests for this host.
|
|
762
|
-
if host not in previous_day_moving_average_dict:
|
|
763
|
-
message = f'Host not in the moving averages: {host}'
|
|
764
|
-
deviation_list.append({
|
|
765
|
-
'day': day,
|
|
766
|
-
'host': host,
|
|
767
|
-
'data': host_dict,
|
|
768
|
-
'message': message,
|
|
769
|
-
'type': 'clear'
|
|
770
|
-
})
|
|
771
|
-
continue
|
|
772
|
-
|
|
773
|
-
_check_deviation(
|
|
774
|
-
'count', 'ma_count', host_dict, previous_day_moving_average_dict)
|
|
775
|
-
_check_deviation(
|
|
776
|
-
'avg_request_size', 'ma_request_size', host_dict, previous_day_moving_average_dict)
|
|
777
|
-
_check_deviation(
|
|
778
|
-
'avg_response_size', 'ma_response_size', host_dict, previous_day_moving_average_dict)
|
|
779
|
-
|
|
780
|
-
return deviation_list
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
def moving_average_calculator_main(
|
|
784
|
-
statistics_file_path: str,
|
|
785
|
-
output_directory: str,
|
|
347
|
+
def deviation_calculator_by_moving_average_main(
|
|
348
|
+
statistics_file_directory: str,
|
|
786
349
|
moving_average_window_days: int,
|
|
787
|
-
top_bottom_deviation_percentage: float
|
|
788
|
-
|
|
350
|
+
top_bottom_deviation_percentage: float,
|
|
351
|
+
get_deviation_for_last_day_only: bool = False,
|
|
352
|
+
summary: bool = False,
|
|
353
|
+
output_json_file_path: str = None
|
|
354
|
+
) -> Union[list, None]:
|
|
789
355
|
"""
|
|
790
356
|
This function is the main function for the moving average calculator.
|
|
791
357
|
|
|
792
|
-
:param
|
|
793
|
-
|
|
358
|
+
:param statistics_file_directory: string, the directory where 'statistics.csv' file resides.
|
|
359
|
+
Also, all the rotated files like: statistics_2021-01-01.csv, statistics_2021-01-02.csv, etc.
|
|
360
|
+
These will be analyzed in the order of the date in the file name.
|
|
794
361
|
:param moving_average_window_days: integer, the moving average window days.
|
|
795
362
|
:param top_bottom_deviation_percentage: float, the top bottom deviation percentage. Example: 0.1 for 10%.
|
|
796
|
-
:
|
|
363
|
+
:param get_deviation_for_last_day_only: bool, if True, only the last day will be analyzed.
|
|
364
|
+
Example: With 'moving_average_window_days=5', the last 6 days will be analyzed.
|
|
365
|
+
5 days for moving average and the last day for deviation.
|
|
366
|
+
File names example:
|
|
367
|
+
statistics_2021-01-01.csv
|
|
368
|
+
statistics_2021-01-02.csv
|
|
369
|
+
statistics_2021-01-03.csv
|
|
370
|
+
statistics_2021-01-04.csv
|
|
371
|
+
statistics_2021-01-05.csv
|
|
372
|
+
statistics_2021-01-06.csv
|
|
373
|
+
Files 01 to 05 will be used for moving average and the file 06 for deviation.
|
|
374
|
+
Meaning the average calculated for 2021-01-06 will be compared to the values moving average of 2021-01-01
|
|
375
|
+
to 2021-01-05.
|
|
376
|
+
:param summary: bool, if True, Only the summary will be generated without all the numbers that were used
|
|
377
|
+
to calculate the averages and the moving average data.
|
|
378
|
+
:param output_json_file_path: string, if None, no json file will be written.
|
|
797
379
|
-----------------------------
|
|
380
|
+
:return: the deviation list of dicts.
|
|
798
381
|
|
|
799
382
|
Example:
|
|
800
383
|
import sys
|
|
@@ -804,9 +387,9 @@ def moving_average_calculator_main(
|
|
|
804
387
|
def main():
|
|
805
388
|
return statistic_analyzer.moving_average_calculator_main(
|
|
806
389
|
statistics_file_path='statistics.csv',
|
|
807
|
-
output_directory='output',
|
|
808
390
|
moving_average_window_days=7,
|
|
809
|
-
top_bottom_deviation_percentage=0.1
|
|
391
|
+
top_bottom_deviation_percentage=0.1,
|
|
392
|
+
output_json_file='C:\\output\\deviation_list.json'
|
|
810
393
|
)
|
|
811
394
|
|
|
812
395
|
|
|
@@ -814,6 +397,8 @@ def moving_average_calculator_main(
|
|
|
814
397
|
sys.exit(main())
|
|
815
398
|
"""
|
|
816
399
|
|
|
400
|
+
statistics_file_path: str = f'{statistics_file_directory}{os.sep}{STATISTICS_FILE_NAME}'
|
|
401
|
+
|
|
817
402
|
def convert_data_value_to_string(value_key: str, list_index: int) -> None:
|
|
818
403
|
deviation_list[list_index]['data'][value_key] = json.dumps(deviation_list[list_index]['data'][value_key])
|
|
819
404
|
|
|
@@ -821,20 +406,37 @@ def moving_average_calculator_main(
|
|
|
821
406
|
if value_key in deviation_list[list_index]:
|
|
822
407
|
deviation_list[list_index][value_key] = json.dumps(deviation_list[list_index][value_key])
|
|
823
408
|
|
|
824
|
-
deviation_list = calculate_moving_average(
|
|
409
|
+
deviation_list = moving_average_helper.calculate_moving_average(
|
|
825
410
|
statistics_file_path,
|
|
826
411
|
moving_average_window_days,
|
|
827
|
-
top_bottom_deviation_percentage
|
|
412
|
+
top_bottom_deviation_percentage,
|
|
413
|
+
get_deviation_for_last_day_only
|
|
828
414
|
)
|
|
829
415
|
|
|
830
416
|
if deviation_list:
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
417
|
+
if output_json_file_path:
|
|
418
|
+
for deviation_list_index, deviation in enumerate(deviation_list):
|
|
419
|
+
convert_data_value_to_string('request_sizes', deviation_list_index)
|
|
420
|
+
convert_data_value_to_string('response_sizes', deviation_list_index)
|
|
421
|
+
convert_value_to_string('ma_data', deviation_list_index)
|
|
422
|
+
|
|
423
|
+
print_api(f'Deviation Found, saving to file: {output_json_file_path}', color='blue')
|
|
424
|
+
jsons.write_json_file(deviation_list, output_json_file_path, use_default_indent=True)
|
|
425
|
+
|
|
426
|
+
if summary:
|
|
427
|
+
summary_deviation_list: list = []
|
|
428
|
+
for deviation in deviation_list:
|
|
429
|
+
summary_deviation_list.append({
|
|
430
|
+
'day': deviation['day'],
|
|
431
|
+
'host': deviation['host'],
|
|
432
|
+
'message': deviation['message'],
|
|
433
|
+
'value': deviation['value'],
|
|
434
|
+
'ma_value': deviation['ma_value'],
|
|
435
|
+
'total_entries_averaged': deviation['data']['count']
|
|
436
|
+
})
|
|
437
|
+
|
|
438
|
+
deviation_list = summary_deviation_list
|
|
835
439
|
|
|
836
|
-
|
|
837
|
-
print_api(f'Deviation Found, saving to file: {file_path}', color='blue')
|
|
838
|
-
jsons.write_json_file(deviation_list, file_path, use_default_indent=True)
|
|
440
|
+
return deviation_list
|
|
839
441
|
|
|
840
|
-
return
|
|
442
|
+
return None
|