atomicshop 2.14.12__py3-none-any.whl → 2.14.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of atomicshop might be problematic. Click here for more details.

@@ -1,146 +1,17 @@
1
+ import os
1
2
  import datetime
2
- import statistics
3
3
  import json
4
- from typing import Literal, Union
4
+ from typing import Union
5
5
 
6
+ from .statistic_analyzer_helper import analyzer_helper, moving_average_helper
6
7
  from .. import filesystem, domains, datetimes, urls
7
8
  from ..basics import dicts
8
- from ..file_io import tomls, xlsxs, csvs, jsons
9
- from ..wrappers.loggingw import reading, consts
9
+ from ..file_io import tomls, xlsxs, jsons
10
+ from ..wrappers.loggingw import reading
10
11
  from ..print_api import print_api
11
12
 
12
13
 
13
- def get_the_last_day_number(statistics_content: list, stop_after_lines: int = None) -> int:
14
- """
15
- This function gets the last day number from the statistics content.
16
-
17
- :param statistics_content: list, of lines in the statistics content.
18
- :param stop_after_lines: integer, if specified, the function will stop after the specified number of lines.
19
- :return: integer, the last day number.
20
- """
21
-
22
- last_day_number = None
23
- start_time_temp = None
24
- for line_index, line in enumerate(statistics_content):
25
- try:
26
- request_time = datetime.datetime.strptime(line['request_time_sent'], '%Y-%m-%d %H:%M:%S.%f')
27
- except ValueError:
28
- continue
29
-
30
- if not start_time_temp:
31
- start_time_temp = request_time
32
-
33
- if stop_after_lines:
34
- if line_index == stop_after_lines:
35
- break
36
-
37
- last_day_number = datetimes.get_difference_between_dates_in_days(start_time_temp, request_time)
38
- return last_day_number
39
-
40
-
41
- def create_empty_features_dict() -> dict:
42
- """
43
- This function creates an empty dictionary for the daily stats. This should be initiated for each 'host_type' of:
44
- 'domain', 'subdomain', 'url_no_parameters'.
45
- :return: dict
46
- """
47
-
48
- return {
49
- 'total_count': {}, 'normal_count': {}, 'error_count': {},
50
- 'request_0_byte_count': {}, 'response_0_byte_count': {},
51
- 'request_sizes_list': {}, 'response_sizes_list': {},
52
- 'request_sizes_no_0_bytes_list': {}, 'response_sizes_no_0_bytes_list': {},
53
- 'average_request_size': {}, 'average_response_size': {},
54
- 'average_request_size_no_0_bytes': {}, 'average_response_size_no_0_bytes': {}}
55
-
56
-
57
- def add_to_count_to_daily_stats(
58
- daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str) -> None:
59
- """
60
- This function adds 1 to the 'count' feature of the current day in the daily stats.
61
-
62
- :param daily_stats: dict, the daily statistics dict.
63
- :param current_day: integer, the current day number.
64
- :param last_day: integer, the last day number.
65
- :param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
66
- :param feature: string, the feature to add the count to. Can be: 'total_count', 'normal_count', 'error_count',
67
- 'request_0_byte_count', 'response_0_byte_count'.
68
- :param host_name: string, the name of the host.
69
-
70
- :return: None.
71
- """
72
-
73
- # Aggregate daily domain hits.
74
- if host_name not in daily_stats[host_type][feature].keys():
75
- daily_stats[host_type][feature][host_name] = {}
76
- # Iterate from first day to the last day.
77
- for day in range(0, last_day + 1):
78
- daily_stats[host_type][feature][host_name][day] = 0
79
-
80
- # Add count to current day.
81
- daily_stats[host_type][feature][host_name][current_day] += 1
82
-
83
-
84
- def add_to_list_to_daily_stats(
85
- daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
86
- size: float) -> None:
87
- """
88
- This function adds the 'size' to the 'feature' list of the current day in the daily stats.
89
-
90
- :param daily_stats: dict, the daily statistics dict.
91
- :param current_day: integer, the current day number.
92
- :param last_day: integer, the last day number.
93
- :param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
94
- :param feature: string, the feature to add the count to. Can be: 'request_sizes_list', 'response_sizes_list',
95
- 'request_sizes_no_0_bytes_list', 'response_sizes_no_0_bytes_list'.
96
- :param host_name: string, the name of the host.
97
- :param size: float, the size in bytes to add to the list.
98
-
99
- :return: None.
100
- """
101
-
102
- # Aggregate daily domain hits.
103
- if host_name not in daily_stats[host_type][feature].keys():
104
- daily_stats[host_type][feature][host_name] = {}
105
- # Iterate from first day to the last day.
106
- for day in range(0, last_day + 1):
107
- daily_stats[host_type][feature][host_name][day] = []
108
-
109
- # Add count to current day.
110
- daily_stats[host_type][feature][host_name][current_day].append(size)
111
-
112
-
113
- def add_to_average_to_daily_stats(
114
- daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
115
- list_of_sizes: list) -> None:
116
- """
117
- This function adds the average size in bytes calculated from the 'list_of_sizes' to the 'feature' of the current
118
- day in the daily stats.
119
-
120
- :param daily_stats: dict, the daily statistics dict.
121
- :param current_day: integer, the current day number.
122
- :param last_day: integer, the last day number.
123
- :param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
124
- :param feature: string, the feature to add the count to. Can be: 'average_request_size', 'average_response_size',
125
- 'average_request_size_no_0_bytes', 'average_response_size_no_0_bytes'.
126
- :param host_name: string, the name of the host.
127
- :param list_of_sizes: list, the list of sizes to calculate the average from.
128
-
129
- :return: None.
130
- """
131
-
132
- # Aggregate daily domain hits.
133
- if host_name not in daily_stats[host_type][feature].keys():
134
- daily_stats[host_type][feature][host_name] = {}
135
- # Iterate from first day to the last day.
136
- for day in range(0, last_day + 1):
137
- daily_stats[host_type][feature][host_name][day] = 0
138
-
139
- # If the list of size is empty, add 0 to the average, since we cannot divide by 0.
140
- if len(list_of_sizes) == 0:
141
- daily_stats[host_type][feature][host_name][current_day] = 0
142
- else:
143
- daily_stats[host_type][feature][host_name][current_day] = sum(list_of_sizes) / len(list_of_sizes)
14
+ STATISTICS_FILE_NAME: str = 'statistics.csv'
144
15
 
145
16
 
146
17
  def analyze(main_file_path: str):
@@ -172,9 +43,9 @@ def analyze(main_file_path: str):
172
43
  'subdomain': {'total_count': {}, 'normal_count': {}, 'error_count': {}}
173
44
  }
174
45
  daily_stats: dict = {
175
- 'domain': create_empty_features_dict(),
176
- 'subdomain': create_empty_features_dict(),
177
- 'url_no_parameters': create_empty_features_dict()
46
+ 'domain': analyzer_helper.create_empty_features_dict(),
47
+ 'subdomain': analyzer_helper.create_empty_features_dict(),
48
+ 'url_no_parameters': analyzer_helper.create_empty_features_dict()
178
49
  }
179
50
 
180
51
  # Start the main loop.
@@ -195,7 +66,7 @@ def analyze(main_file_path: str):
195
66
 
196
67
  # Find the last day number. If 'break_after_lines' is specified, the loop will stop after the specified line.
197
68
  if not last_day_number:
198
- last_day_number = get_the_last_day_number(statistics_content, break_after_lines)
69
+ last_day_number = analyzer_helper.get_the_last_day_number(statistics_content, break_after_lines)
199
70
 
200
71
  if break_after_lines:
201
72
  if line_index == break_after_lines:
@@ -295,87 +166,87 @@ def analyze(main_file_path: str):
295
166
  day_number = datetimes.get_difference_between_dates_in_days(start_time, request_time)
296
167
 
297
168
  # Add 1 to the total count of the current day.
298
- add_to_count_to_daily_stats(
169
+ analyzer_helper.add_to_count_to_daily_stats(
299
170
  daily_stats, day_number, last_day_number, 'domain', 'total_count', main_domain)
300
- add_to_count_to_daily_stats(
171
+ analyzer_helper.add_to_count_to_daily_stats(
301
172
  daily_stats, day_number, last_day_number, 'subdomain', 'total_count', subdomain)
302
- add_to_count_to_daily_stats(
173
+ analyzer_helper.add_to_count_to_daily_stats(
303
174
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'total_count', url_no_parameters)
304
175
 
305
176
  # Handle line if it has error.
306
177
  if line['error'] != '':
307
- add_to_count_to_daily_stats(
178
+ analyzer_helper.add_to_count_to_daily_stats(
308
179
  daily_stats, day_number, last_day_number, 'domain', 'error_count', main_domain)
309
- add_to_count_to_daily_stats(
180
+ analyzer_helper.add_to_count_to_daily_stats(
310
181
  daily_stats, day_number, last_day_number, 'subdomain', 'error_count', subdomain)
311
- add_to_count_to_daily_stats(
182
+ analyzer_helper.add_to_count_to_daily_stats(
312
183
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'error_count', url_no_parameters)
313
184
  else:
314
- add_to_count_to_daily_stats(
185
+ analyzer_helper.add_to_count_to_daily_stats(
315
186
  daily_stats, day_number, last_day_number, 'domain', 'normal_count', main_domain)
316
- add_to_count_to_daily_stats(
187
+ analyzer_helper.add_to_count_to_daily_stats(
317
188
  daily_stats, day_number, last_day_number, 'subdomain', 'normal_count', subdomain)
318
- add_to_count_to_daily_stats(
189
+ analyzer_helper.add_to_count_to_daily_stats(
319
190
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'normal_count', url_no_parameters)
320
191
 
321
192
  if request_size == 0:
322
- add_to_count_to_daily_stats(
193
+ analyzer_helper.add_to_count_to_daily_stats(
323
194
  daily_stats, day_number, last_day_number, 'domain', 'request_0_byte_count',
324
195
  main_domain)
325
- add_to_count_to_daily_stats(
196
+ analyzer_helper.add_to_count_to_daily_stats(
326
197
  daily_stats, day_number, last_day_number, 'subdomain', 'request_0_byte_count',
327
198
  subdomain)
328
- add_to_count_to_daily_stats(
199
+ analyzer_helper.add_to_count_to_daily_stats(
329
200
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_0_byte_count',
330
201
  url_no_parameters)
331
202
 
332
203
  if response_size == 0:
333
- add_to_count_to_daily_stats(
204
+ analyzer_helper.add_to_count_to_daily_stats(
334
205
  daily_stats, day_number, last_day_number, 'domain', 'response_0_byte_count',
335
206
  main_domain)
336
- add_to_count_to_daily_stats(
207
+ analyzer_helper.add_to_count_to_daily_stats(
337
208
  daily_stats, day_number, last_day_number, 'subdomain', 'response_0_byte_count',
338
209
  subdomain)
339
- add_to_count_to_daily_stats(
210
+ analyzer_helper.add_to_count_to_daily_stats(
340
211
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_0_byte_count',
341
212
  url_no_parameters)
342
213
 
343
214
  if request_size is not None and response_size is not None:
344
- add_to_list_to_daily_stats(
215
+ analyzer_helper.add_to_list_to_daily_stats(
345
216
  daily_stats, day_number, last_day_number, 'domain', 'request_sizes_list', main_domain, request_size)
346
- add_to_list_to_daily_stats(
217
+ analyzer_helper.add_to_list_to_daily_stats(
347
218
  daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_list', subdomain, request_size)
348
- add_to_list_to_daily_stats(
219
+ analyzer_helper.add_to_list_to_daily_stats(
349
220
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_list', url_no_parameters,
350
221
  request_size)
351
222
 
352
- add_to_list_to_daily_stats(
223
+ analyzer_helper.add_to_list_to_daily_stats(
353
224
  daily_stats, day_number, last_day_number, 'domain', 'response_sizes_list', main_domain, response_size)
354
- add_to_list_to_daily_stats(
225
+ analyzer_helper.add_to_list_to_daily_stats(
355
226
  daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_list', subdomain, response_size)
356
- add_to_list_to_daily_stats(
227
+ analyzer_helper.add_to_list_to_daily_stats(
357
228
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_list', url_no_parameters,
358
229
  response_size)
359
230
 
360
231
  if request_size != 0 and request_size is not None:
361
- add_to_list_to_daily_stats(
232
+ analyzer_helper.add_to_list_to_daily_stats(
362
233
  daily_stats, day_number, last_day_number, 'domain', 'request_sizes_no_0_bytes_list',
363
234
  main_domain, request_size)
364
- add_to_list_to_daily_stats(
235
+ analyzer_helper.add_to_list_to_daily_stats(
365
236
  daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_no_0_bytes_list',
366
237
  subdomain, request_size)
367
- add_to_list_to_daily_stats(
238
+ analyzer_helper.add_to_list_to_daily_stats(
368
239
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_no_0_bytes_list',
369
240
  url_no_parameters, request_size)
370
241
 
371
242
  if response_size != 0 and response_size is not None:
372
- add_to_list_to_daily_stats(
243
+ analyzer_helper.add_to_list_to_daily_stats(
373
244
  daily_stats, day_number, last_day_number, 'domain', 'response_sizes_no_0_bytes_list',
374
245
  main_domain, response_size)
375
- add_to_list_to_daily_stats(
246
+ analyzer_helper.add_to_list_to_daily_stats(
376
247
  daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_no_0_bytes_list',
377
248
  subdomain, response_size)
378
- add_to_list_to_daily_stats(
249
+ analyzer_helper.add_to_list_to_daily_stats(
379
250
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_no_0_bytes_list',
380
251
  url_no_parameters, response_size)
381
252
 
@@ -397,7 +268,7 @@ def analyze(main_file_path: str):
397
268
 
398
269
  for host_name, days in hosts.items():
399
270
  for day, sizes in days.items():
400
- add_to_average_to_daily_stats(
271
+ analyzer_helper.add_to_average_to_daily_stats(
401
272
  daily_stats, day, last_day_number, host_type, feature_name, host_name, sizes)
402
273
 
403
274
  # Sorting overall stats.
@@ -473,324 +344,37 @@ def analyze(main_file_path: str):
473
344
  # ======================================================================================================================
474
345
 
475
346
 
476
- def calculate_moving_average(
477
- file_path: str,
478
- moving_average_window_days,
479
- top_bottom_deviation_percentage: float,
480
- print_kwargs: dict = None
481
- ) -> list:
482
- """
483
- This function calculates the moving average of the daily statistics.
484
-
485
- :param file_path: string, the path to the 'statistics.csv' file.
486
- :param moving_average_window_days: integer, the window size for the moving average.
487
- :param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
488
- bottom.
489
- :param print_kwargs: dict, the print_api arguments.
490
- """
491
-
492
- date_pattern: str = consts.DEFAULT_ROTATING_SUFFIXES_FROM_WHEN['midnight']
493
-
494
- # Get all the file paths and their midnight rotations.
495
- logs_paths: list = reading.get_logs_paths(
496
- log_file_path=file_path,
497
- date_pattern=date_pattern
498
- )
499
-
500
- statistics_content: dict = {}
501
- # Read each file to its day.
502
- for log_path_dict in logs_paths:
503
- date_string = log_path_dict['date_string']
504
- statistics_content[date_string] = {}
505
-
506
- statistics_content[date_string]['file'] = log_path_dict
507
-
508
- log_file_content, log_file_header = (
509
- csvs.read_csv_to_list_of_dicts_by_header(log_path_dict['file_path'], **(print_kwargs or {})))
510
- statistics_content[date_string]['content'] = log_file_content
511
- statistics_content[date_string]['header'] = log_file_header
512
-
513
- statistics_content[date_string]['content_no_errors'] = get_content_without_errors(log_file_content)
514
-
515
- # Get the data dictionary from the statistics content.
516
- statistics_content[date_string]['statistics_daily'] = compute_statistics_from_content(
517
- statistics_content[date_string]['content_no_errors']
518
- )
519
-
520
- moving_average_dict: dict = compute_moving_averages_from_average_statistics(
521
- statistics_content,
522
- moving_average_window_days
523
- )
524
-
525
- # Add the moving average to the statistics content.
526
- for day, day_dict in statistics_content.items():
527
- try:
528
- day_dict['moving_average'] = moving_average_dict[day]
529
- except KeyError:
530
- day_dict['moving_average'] = {}
531
-
532
- # Find deviation from the moving average to the bottom or top by specified percentage.
533
- deviation_list: list = find_deviation_from_moving_average(
534
- statistics_content, top_bottom_deviation_percentage)
535
-
536
- return deviation_list
537
-
538
-
539
- def get_content_without_errors(content: list) -> list:
540
- """
541
- This function gets the 'statistics.csv' file content without errors from the 'content' list.
542
-
543
- :param content: list, the content list.
544
- :return: list, the content without errors.
545
- """
546
-
547
- traffic_statistics_without_errors: list = []
548
- for line in content:
549
- # Skip empty lines, headers and errors.
550
- if line['host'] == 'host' or line['command'] == '':
551
- continue
552
-
553
- traffic_statistics_without_errors.append(line)
554
-
555
- return traffic_statistics_without_errors
556
-
557
-
558
- def get_data_dict_from_statistics_content(content: list) -> dict:
559
- """
560
- This function gets the data dictionary from the 'statistics.csv' file content.
561
-
562
- :param content: list, the content list.
563
- :return: dict, the data dictionary.
564
- """
565
-
566
- hosts_requests_responses: dict = {}
567
- for line in content:
568
- # If subdomain is not in the dictionary, add it.
569
- if line['host'] not in hosts_requests_responses:
570
- hosts_requests_responses[line['host']] = {
571
- 'request_sizes': [],
572
- 'response_sizes': []
573
- }
574
-
575
- # Append the sizes.
576
- try:
577
- hosts_requests_responses[line['host']]['request_sizes'].append(int(line['request_size_bytes']))
578
- hosts_requests_responses[line['host']]['response_sizes'].append(
579
- int(line['response_size_bytes']))
580
- except ValueError:
581
- print_api(line, color='yellow')
582
- raise
583
-
584
- return hosts_requests_responses
585
-
586
-
587
- def compute_statistics_from_data_dict(data_dict: dict):
588
- """
589
- This function computes the statistics from the data dictionary.
590
-
591
- :param data_dict: dict, the data dictionary.
592
- :return: dict, the statistics dictionary.
593
- """
594
-
595
- for host, host_dict in data_dict.items():
596
- count = len(host_dict['request_sizes'])
597
- avg_request_size = statistics.mean(host_dict['request_sizes']) if count > 0 else 0
598
- median_request_size = statistics.median(host_dict['request_sizes']) if count > 0 else 0
599
- avg_response_size = statistics.mean(host_dict['response_sizes']) if count > 0 else 0
600
- median_response_size = statistics.median(host_dict['response_sizes']) if count > 0 else 0
601
-
602
- data_dict[host]['count'] = count
603
- data_dict[host]['avg_request_size'] = avg_request_size
604
- data_dict[host]['median_request_size'] = median_request_size
605
- data_dict[host]['avg_response_size'] = avg_response_size
606
- data_dict[host]['median_response_size'] = median_response_size
607
-
608
-
609
- def compute_statistics_from_content(content: list):
610
- """
611
- This function computes the statistics from the 'statistics.csv' file content.
612
-
613
- :param content: list, the content list.
614
- :return: dict, the statistics dictionary.
615
- """
616
-
617
- hosts_requests_responses: dict = get_data_dict_from_statistics_content(content)
618
- compute_statistics_from_data_dict(hosts_requests_responses)
619
-
620
- return hosts_requests_responses
621
-
622
-
623
- def compute_moving_averages_from_average_statistics(
624
- average_statistics_dict: dict,
625
- moving_average_window_days: int
626
- ):
627
- """
628
- This function computes the moving averages from the average statistics dictionary.
629
-
630
- :param average_statistics_dict: dict, the average statistics dictionary.
631
- :param moving_average_window_days: integer, the window size for the moving average.
632
- :return: dict, the moving averages dictionary.
633
- """
634
-
635
- moving_average: dict = {}
636
- for day_index, (day, day_dict) in enumerate(average_statistics_dict.items()):
637
- current_day = day_index + 1
638
- if current_day < moving_average_window_days:
639
- continue
640
-
641
- # Create list of the previous 'moving_average_window_days' days.
642
- previous_days_content_list = (
643
- list(average_statistics_dict.values()))[current_day-moving_average_window_days:current_day]
644
-
645
- # Compute the moving averages.
646
- moving_average[day] = compute_average_for_current_day_from_past_x_days(previous_days_content_list)
647
-
648
- return moving_average
649
-
650
-
651
- def compute_average_for_current_day_from_past_x_days(previous_days_content_list: list) -> dict:
652
- """
653
- This function computes the average for the current day from the past x days.
654
-
655
- :param previous_days_content_list: list, the list of the previous days content.
656
- :return: dict, the average dictionary.
657
- """
658
-
659
- moving_average: dict = {}
660
- for entry in previous_days_content_list:
661
- statistics_daily = entry['statistics_daily']
662
- for host, host_dict in statistics_daily.items():
663
- if host not in moving_average:
664
- moving_average[host] = {
665
- 'counts': [],
666
- 'avg_request_sizes': [],
667
- 'avg_response_sizes': [],
668
- }
669
-
670
- moving_average[host]['counts'].append(int(host_dict['count']))
671
- moving_average[host]['avg_request_sizes'].append(float(host_dict['avg_request_size']))
672
- moving_average[host]['avg_response_sizes'].append(float(host_dict['avg_response_size']))
673
-
674
- # Compute the moving average.
675
- moving_average_results: dict = {}
676
- for host, host_dict in moving_average.items():
677
- ma_count = statistics.mean(host_dict['counts'])
678
- ma_request_size = statistics.mean(host_dict['avg_request_sizes'])
679
- ma_response_size = statistics.mean(host_dict['avg_response_sizes'])
680
-
681
- moving_average_results[host] = {
682
- 'ma_count': ma_count,
683
- 'ma_request_size': ma_request_size,
684
- 'ma_response_size': ma_response_size,
685
- 'counts': host_dict['counts'],
686
- 'avg_request_sizes': host_dict['avg_request_sizes'],
687
- 'avg_response_sizes': host_dict['avg_response_sizes']
688
- }
689
-
690
- return moving_average_results
691
-
692
-
693
- def find_deviation_from_moving_average(
694
- statistics_content: dict,
695
- top_bottom_deviation_percentage: float
696
- ) -> list:
697
- """
698
- This function finds the deviation from the moving average to the bottom or top by specified percentage.
699
-
700
- :param statistics_content: dict, the statistics content dictionary.
701
- :param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
702
- bottom.
703
- :return: list, the deviation list.
704
- """
705
-
706
- def _check_deviation(
707
- check_type: Literal['count', 'avg_request_size', 'avg_response_size'],
708
- ma_check_type: Literal['ma_count', 'ma_request_size', 'ma_response_size'],
709
- day_statistics_content_dict: dict,
710
- moving_averages_dict: dict
711
- ):
712
- """
713
- This function checks the deviation for the host.
714
- """
715
-
716
- nonlocal message
717
-
718
- host_moving_average_by_type = moving_averages_dict[host][ma_check_type]
719
- check_type_moving_by_percent = (
720
- host_moving_average_by_type * top_bottom_deviation_percentage)
721
- check_type_moving_above = host_moving_average_by_type + check_type_moving_by_percent
722
- check_type_moving_below = host_moving_average_by_type - check_type_moving_by_percent
723
-
724
- deviation_type = None
725
- if day_statistics_content_dict[check_type] > check_type_moving_above:
726
- deviation_type = 'above'
727
- elif day_statistics_content_dict[check_type] < check_type_moving_below:
728
- deviation_type = 'below'
729
-
730
- if deviation_type:
731
- message = f'[{check_type}] is [{deviation_type}] the moving average.'
732
- deviation_list.append({
733
- 'day': day,
734
- 'host': host,
735
- 'message': message,
736
- 'value': day_statistics_content_dict[check_type],
737
- 'ma_value': host_moving_average_by_type,
738
- 'check_type': check_type,
739
- 'percentage': top_bottom_deviation_percentage,
740
- 'ma_value_checked': check_type_moving_above,
741
- 'deviation_type': deviation_type,
742
- 'data': day_statistics_content_dict,
743
- 'ma_data': moving_averages_dict[host]
744
- })
745
-
746
- deviation_list: list = []
747
- for day_index, (day, day_dict) in enumerate(statistics_content.items()):
748
- # If it's the first day, there is no previous day moving average.
749
- if day_index == 0:
750
- previous_day_moving_average_dict = {}
751
- else:
752
- previous_day_moving_average_dict = list(statistics_content.values())[day_index-1].get('moving_average', {})
753
-
754
- # If there is no moving average for previous day continue to the next day.
755
- if not previous_day_moving_average_dict:
756
- continue
757
-
758
- for host, host_dict in day_dict['statistics_daily'].items():
759
- # If the host is not in the moving averages, then this is clear deviation.
760
- # It means that in the current day, there were no requests for this host.
761
- if host not in previous_day_moving_average_dict:
762
- message = f'Host not in the moving averages: {host}'
763
- deviation_list.append({
764
- 'day': day,
765
- 'host': host,
766
- 'data': host_dict,
767
- 'message': message,
768
- 'type': 'clear'
769
- })
770
- continue
771
-
772
- _check_deviation(
773
- 'count', 'ma_count', host_dict, previous_day_moving_average_dict)
774
- _check_deviation(
775
- 'avg_request_size', 'ma_request_size', host_dict, previous_day_moving_average_dict)
776
- _check_deviation(
777
- 'avg_response_size', 'ma_response_size', host_dict, previous_day_moving_average_dict)
778
-
779
- return deviation_list
780
-
781
-
782
- def moving_average_calculator_main(
783
- statistics_file_path: str,
347
+ def deviation_calculator_by_moving_average_main(
348
+ statistics_file_directory: str,
784
349
  moving_average_window_days: int,
785
350
  top_bottom_deviation_percentage: float,
351
+ get_deviation_for_last_day_only: bool = False,
352
+ summary: bool = False,
786
353
  output_json_file_path: str = None
787
354
  ) -> Union[list, None]:
788
355
  """
789
356
  This function is the main function for the moving average calculator.
790
357
 
791
- :param statistics_file_path: string, the statistics file path.
358
+ :param statistics_file_directory: string, the directory where 'statistics.csv' file resides.
359
+ Also, all the rotated files like: statistics_2021-01-01.csv, statistics_2021-01-02.csv, etc.
360
+ These will be analyzed in the order of the date in the file name.
792
361
  :param moving_average_window_days: integer, the moving average window days.
793
362
  :param top_bottom_deviation_percentage: float, the top bottom deviation percentage. Example: 0.1 for 10%.
363
+ :param get_deviation_for_last_day_only: bool, if True, only the last day will be analyzed.
364
+ Example: With 'moving_average_window_days=5', the last 6 days will be analyzed.
365
+ 5 days for moving average and the last day for deviation.
366
+ File names example:
367
+ statistics_2021-01-01.csv
368
+ statistics_2021-01-02.csv
369
+ statistics_2021-01-03.csv
370
+ statistics_2021-01-04.csv
371
+ statistics_2021-01-05.csv
372
+ statistics_2021-01-06.csv
373
+ Files 01 to 05 will be used for moving average and the file 06 for deviation.
374
+ Meaning the average calculated for 2021-01-06 will be compared to the values moving average of 2021-01-01
375
+ to 2021-01-05.
376
+ :param summary: bool, if True, Only the summary will be generated without all the numbers that were used
377
+ to calculate the averages and the moving average data.
794
378
  :param output_json_file_path: string, if None, no json file will be written.
795
379
  -----------------------------
796
380
  :return: the deviation list of dicts.
@@ -813,6 +397,8 @@ def moving_average_calculator_main(
813
397
  sys.exit(main())
814
398
  """
815
399
 
400
+ statistics_file_path: str = f'{statistics_file_directory}{os.sep}{STATISTICS_FILE_NAME}'
401
+
816
402
  def convert_data_value_to_string(value_key: str, list_index: int) -> None:
817
403
  deviation_list[list_index]['data'][value_key] = json.dumps(deviation_list[list_index]['data'][value_key])
818
404
 
@@ -820,10 +406,11 @@ def moving_average_calculator_main(
820
406
  if value_key in deviation_list[list_index]:
821
407
  deviation_list[list_index][value_key] = json.dumps(deviation_list[list_index][value_key])
822
408
 
823
- deviation_list = calculate_moving_average(
409
+ deviation_list = moving_average_helper.calculate_moving_average(
824
410
  statistics_file_path,
825
411
  moving_average_window_days,
826
- top_bottom_deviation_percentage
412
+ top_bottom_deviation_percentage,
413
+ get_deviation_for_last_day_only
827
414
  )
828
415
 
829
416
  if deviation_list:
@@ -836,6 +423,20 @@ def moving_average_calculator_main(
836
423
  print_api(f'Deviation Found, saving to file: {output_json_file_path}', color='blue')
837
424
  jsons.write_json_file(deviation_list, output_json_file_path, use_default_indent=True)
838
425
 
426
+ if summary:
427
+ summary_deviation_list: list = []
428
+ for deviation in deviation_list:
429
+ summary_deviation_list.append({
430
+ 'day': deviation['day'],
431
+ 'host': deviation['host'],
432
+ 'message': deviation['message'],
433
+ 'value': deviation['value'],
434
+ 'ma_value': deviation['ma_value'],
435
+ 'total_entries_averaged': deviation['data']['count']
436
+ })
437
+
438
+ deviation_list = summary_deviation_list
439
+
839
440
  return deviation_list
840
441
 
841
442
  return None