atomicshop 2.14.11__py3-none-any.whl → 2.14.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of atomicshop might be problematic. Click here for more details.

@@ -1,147 +1,17 @@
1
1
  import os
2
2
  import datetime
3
- import statistics
4
3
  import json
5
- from typing import Literal
4
+ from typing import Union
6
5
 
6
+ from .statistic_analyzer_helper import analyzer_helper, moving_average_helper
7
7
  from .. import filesystem, domains, datetimes, urls
8
8
  from ..basics import dicts
9
- from ..file_io import tomls, xlsxs, csvs, jsons
9
+ from ..file_io import tomls, xlsxs, jsons
10
10
  from ..wrappers.loggingw import reading
11
11
  from ..print_api import print_api
12
12
 
13
13
 
14
- def get_the_last_day_number(statistics_content: list, stop_after_lines: int = None) -> int:
15
- """
16
- This function gets the last day number from the statistics content.
17
-
18
- :param statistics_content: list, of lines in the statistics content.
19
- :param stop_after_lines: integer, if specified, the function will stop after the specified number of lines.
20
- :return: integer, the last day number.
21
- """
22
-
23
- last_day_number = None
24
- start_time_temp = None
25
- for line_index, line in enumerate(statistics_content):
26
- try:
27
- request_time = datetime.datetime.strptime(line['request_time_sent'], '%Y-%m-%d %H:%M:%S.%f')
28
- except ValueError:
29
- continue
30
-
31
- if not start_time_temp:
32
- start_time_temp = request_time
33
-
34
- if stop_after_lines:
35
- if line_index == stop_after_lines:
36
- break
37
-
38
- last_day_number = datetimes.get_difference_between_dates_in_days(start_time_temp, request_time)
39
- return last_day_number
40
-
41
-
42
- def create_empty_features_dict() -> dict:
43
- """
44
- This function creates an empty dictionary for the daily stats. This should be initiated for each 'host_type' of:
45
- 'domain', 'subdomain', 'url_no_parameters'.
46
- :return: dict
47
- """
48
-
49
- return {
50
- 'total_count': {}, 'normal_count': {}, 'error_count': {},
51
- 'request_0_byte_count': {}, 'response_0_byte_count': {},
52
- 'request_sizes_list': {}, 'response_sizes_list': {},
53
- 'request_sizes_no_0_bytes_list': {}, 'response_sizes_no_0_bytes_list': {},
54
- 'average_request_size': {}, 'average_response_size': {},
55
- 'average_request_size_no_0_bytes': {}, 'average_response_size_no_0_bytes': {}}
56
-
57
-
58
- def add_to_count_to_daily_stats(
59
- daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str) -> None:
60
- """
61
- This function adds 1 to the 'count' feature of the current day in the daily stats.
62
-
63
- :param daily_stats: dict, the daily statistics dict.
64
- :param current_day: integer, the current day number.
65
- :param last_day: integer, the last day number.
66
- :param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
67
- :param feature: string, the feature to add the count to. Can be: 'total_count', 'normal_count', 'error_count',
68
- 'request_0_byte_count', 'response_0_byte_count'.
69
- :param host_name: string, the name of the host.
70
-
71
- :return: None.
72
- """
73
-
74
- # Aggregate daily domain hits.
75
- if host_name not in daily_stats[host_type][feature].keys():
76
- daily_stats[host_type][feature][host_name] = {}
77
- # Iterate from first day to the last day.
78
- for day in range(0, last_day + 1):
79
- daily_stats[host_type][feature][host_name][day] = 0
80
-
81
- # Add count to current day.
82
- daily_stats[host_type][feature][host_name][current_day] += 1
83
-
84
-
85
- def add_to_list_to_daily_stats(
86
- daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
87
- size: float) -> None:
88
- """
89
- This function adds the 'size' to the 'feature' list of the current day in the daily stats.
90
-
91
- :param daily_stats: dict, the daily statistics dict.
92
- :param current_day: integer, the current day number.
93
- :param last_day: integer, the last day number.
94
- :param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
95
- :param feature: string, the feature to add the count to. Can be: 'request_sizes_list', 'response_sizes_list',
96
- 'request_sizes_no_0_bytes_list', 'response_sizes_no_0_bytes_list'.
97
- :param host_name: string, the name of the host.
98
- :param size: float, the size in bytes to add to the list.
99
-
100
- :return: None.
101
- """
102
-
103
- # Aggregate daily domain hits.
104
- if host_name not in daily_stats[host_type][feature].keys():
105
- daily_stats[host_type][feature][host_name] = {}
106
- # Iterate from first day to the last day.
107
- for day in range(0, last_day + 1):
108
- daily_stats[host_type][feature][host_name][day] = []
109
-
110
- # Add count to current day.
111
- daily_stats[host_type][feature][host_name][current_day].append(size)
112
-
113
-
114
- def add_to_average_to_daily_stats(
115
- daily_stats: dict, current_day: int, last_day: int, host_type: str, feature: str, host_name: str,
116
- list_of_sizes: list) -> None:
117
- """
118
- This function adds the average size in bytes calculated from the 'list_of_sizes' to the 'feature' of the current
119
- day in the daily stats.
120
-
121
- :param daily_stats: dict, the daily statistics dict.
122
- :param current_day: integer, the current day number.
123
- :param last_day: integer, the last day number.
124
- :param host_type: string, the type of the host. Can be: 'domain', 'subdomain', 'url_no_parameters'.
125
- :param feature: string, the feature to add the count to. Can be: 'average_request_size', 'average_response_size',
126
- 'average_request_size_no_0_bytes', 'average_response_size_no_0_bytes'.
127
- :param host_name: string, the name of the host.
128
- :param list_of_sizes: list, the list of sizes to calculate the average from.
129
-
130
- :return: None.
131
- """
132
-
133
- # Aggregate daily domain hits.
134
- if host_name not in daily_stats[host_type][feature].keys():
135
- daily_stats[host_type][feature][host_name] = {}
136
- # Iterate from first day to the last day.
137
- for day in range(0, last_day + 1):
138
- daily_stats[host_type][feature][host_name][day] = 0
139
-
140
- # If the list of size is empty, add 0 to the average, since we cannot divide by 0.
141
- if len(list_of_sizes) == 0:
142
- daily_stats[host_type][feature][host_name][current_day] = 0
143
- else:
144
- daily_stats[host_type][feature][host_name][current_day] = sum(list_of_sizes) / len(list_of_sizes)
14
+ STATISTICS_FILE_NAME: str = 'statistics.csv'
145
15
 
146
16
 
147
17
  def analyze(main_file_path: str):
@@ -173,9 +43,9 @@ def analyze(main_file_path: str):
173
43
  'subdomain': {'total_count': {}, 'normal_count': {}, 'error_count': {}}
174
44
  }
175
45
  daily_stats: dict = {
176
- 'domain': create_empty_features_dict(),
177
- 'subdomain': create_empty_features_dict(),
178
- 'url_no_parameters': create_empty_features_dict()
46
+ 'domain': analyzer_helper.create_empty_features_dict(),
47
+ 'subdomain': analyzer_helper.create_empty_features_dict(),
48
+ 'url_no_parameters': analyzer_helper.create_empty_features_dict()
179
49
  }
180
50
 
181
51
  # Start the main loop.
@@ -196,7 +66,7 @@ def analyze(main_file_path: str):
196
66
 
197
67
  # Find the last day number. If 'break_after_lines' is specified, the loop will stop after the specified line.
198
68
  if not last_day_number:
199
- last_day_number = get_the_last_day_number(statistics_content, break_after_lines)
69
+ last_day_number = analyzer_helper.get_the_last_day_number(statistics_content, break_after_lines)
200
70
 
201
71
  if break_after_lines:
202
72
  if line_index == break_after_lines:
@@ -296,87 +166,87 @@ def analyze(main_file_path: str):
296
166
  day_number = datetimes.get_difference_between_dates_in_days(start_time, request_time)
297
167
 
298
168
  # Add 1 to the total count of the current day.
299
- add_to_count_to_daily_stats(
169
+ analyzer_helper.add_to_count_to_daily_stats(
300
170
  daily_stats, day_number, last_day_number, 'domain', 'total_count', main_domain)
301
- add_to_count_to_daily_stats(
171
+ analyzer_helper.add_to_count_to_daily_stats(
302
172
  daily_stats, day_number, last_day_number, 'subdomain', 'total_count', subdomain)
303
- add_to_count_to_daily_stats(
173
+ analyzer_helper.add_to_count_to_daily_stats(
304
174
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'total_count', url_no_parameters)
305
175
 
306
176
  # Handle line if it has error.
307
177
  if line['error'] != '':
308
- add_to_count_to_daily_stats(
178
+ analyzer_helper.add_to_count_to_daily_stats(
309
179
  daily_stats, day_number, last_day_number, 'domain', 'error_count', main_domain)
310
- add_to_count_to_daily_stats(
180
+ analyzer_helper.add_to_count_to_daily_stats(
311
181
  daily_stats, day_number, last_day_number, 'subdomain', 'error_count', subdomain)
312
- add_to_count_to_daily_stats(
182
+ analyzer_helper.add_to_count_to_daily_stats(
313
183
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'error_count', url_no_parameters)
314
184
  else:
315
- add_to_count_to_daily_stats(
185
+ analyzer_helper.add_to_count_to_daily_stats(
316
186
  daily_stats, day_number, last_day_number, 'domain', 'normal_count', main_domain)
317
- add_to_count_to_daily_stats(
187
+ analyzer_helper.add_to_count_to_daily_stats(
318
188
  daily_stats, day_number, last_day_number, 'subdomain', 'normal_count', subdomain)
319
- add_to_count_to_daily_stats(
189
+ analyzer_helper.add_to_count_to_daily_stats(
320
190
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'normal_count', url_no_parameters)
321
191
 
322
192
  if request_size == 0:
323
- add_to_count_to_daily_stats(
193
+ analyzer_helper.add_to_count_to_daily_stats(
324
194
  daily_stats, day_number, last_day_number, 'domain', 'request_0_byte_count',
325
195
  main_domain)
326
- add_to_count_to_daily_stats(
196
+ analyzer_helper.add_to_count_to_daily_stats(
327
197
  daily_stats, day_number, last_day_number, 'subdomain', 'request_0_byte_count',
328
198
  subdomain)
329
- add_to_count_to_daily_stats(
199
+ analyzer_helper.add_to_count_to_daily_stats(
330
200
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_0_byte_count',
331
201
  url_no_parameters)
332
202
 
333
203
  if response_size == 0:
334
- add_to_count_to_daily_stats(
204
+ analyzer_helper.add_to_count_to_daily_stats(
335
205
  daily_stats, day_number, last_day_number, 'domain', 'response_0_byte_count',
336
206
  main_domain)
337
- add_to_count_to_daily_stats(
207
+ analyzer_helper.add_to_count_to_daily_stats(
338
208
  daily_stats, day_number, last_day_number, 'subdomain', 'response_0_byte_count',
339
209
  subdomain)
340
- add_to_count_to_daily_stats(
210
+ analyzer_helper.add_to_count_to_daily_stats(
341
211
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_0_byte_count',
342
212
  url_no_parameters)
343
213
 
344
214
  if request_size is not None and response_size is not None:
345
- add_to_list_to_daily_stats(
215
+ analyzer_helper.add_to_list_to_daily_stats(
346
216
  daily_stats, day_number, last_day_number, 'domain', 'request_sizes_list', main_domain, request_size)
347
- add_to_list_to_daily_stats(
217
+ analyzer_helper.add_to_list_to_daily_stats(
348
218
  daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_list', subdomain, request_size)
349
- add_to_list_to_daily_stats(
219
+ analyzer_helper.add_to_list_to_daily_stats(
350
220
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_list', url_no_parameters,
351
221
  request_size)
352
222
 
353
- add_to_list_to_daily_stats(
223
+ analyzer_helper.add_to_list_to_daily_stats(
354
224
  daily_stats, day_number, last_day_number, 'domain', 'response_sizes_list', main_domain, response_size)
355
- add_to_list_to_daily_stats(
225
+ analyzer_helper.add_to_list_to_daily_stats(
356
226
  daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_list', subdomain, response_size)
357
- add_to_list_to_daily_stats(
227
+ analyzer_helper.add_to_list_to_daily_stats(
358
228
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_list', url_no_parameters,
359
229
  response_size)
360
230
 
361
231
  if request_size != 0 and request_size is not None:
362
- add_to_list_to_daily_stats(
232
+ analyzer_helper.add_to_list_to_daily_stats(
363
233
  daily_stats, day_number, last_day_number, 'domain', 'request_sizes_no_0_bytes_list',
364
234
  main_domain, request_size)
365
- add_to_list_to_daily_stats(
235
+ analyzer_helper.add_to_list_to_daily_stats(
366
236
  daily_stats, day_number, last_day_number, 'subdomain', 'request_sizes_no_0_bytes_list',
367
237
  subdomain, request_size)
368
- add_to_list_to_daily_stats(
238
+ analyzer_helper.add_to_list_to_daily_stats(
369
239
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'request_sizes_no_0_bytes_list',
370
240
  url_no_parameters, request_size)
371
241
 
372
242
  if response_size != 0 and response_size is not None:
373
- add_to_list_to_daily_stats(
243
+ analyzer_helper.add_to_list_to_daily_stats(
374
244
  daily_stats, day_number, last_day_number, 'domain', 'response_sizes_no_0_bytes_list',
375
245
  main_domain, response_size)
376
- add_to_list_to_daily_stats(
246
+ analyzer_helper.add_to_list_to_daily_stats(
377
247
  daily_stats, day_number, last_day_number, 'subdomain', 'response_sizes_no_0_bytes_list',
378
248
  subdomain, response_size)
379
- add_to_list_to_daily_stats(
249
+ analyzer_helper.add_to_list_to_daily_stats(
380
250
  daily_stats, day_number, last_day_number, 'url_no_parameters', 'response_sizes_no_0_bytes_list',
381
251
  url_no_parameters, response_size)
382
252
 
@@ -398,7 +268,7 @@ def analyze(main_file_path: str):
398
268
 
399
269
  for host_name, days in hosts.items():
400
270
  for day, sizes in days.items():
401
- add_to_average_to_daily_stats(
271
+ analyzer_helper.add_to_average_to_daily_stats(
402
272
  daily_stats, day, last_day_number, host_type, feature_name, host_name, sizes)
403
273
 
404
274
  # Sorting overall stats.
@@ -474,327 +344,40 @@ def analyze(main_file_path: str):
474
344
  # ======================================================================================================================
475
345
 
476
346
 
477
- def calculate_moving_average(
478
- file_path: str,
479
- moving_average_window_days,
480
- top_bottom_deviation_percentage: float,
481
- print_kwargs: dict = None
482
- ):
483
- """
484
- This function calculates the moving average of the daily statistics.
485
-
486
- :param file_path: string, the path to the 'statistics.csv' file.
487
- :param moving_average_window_days: integer, the window size for the moving average.
488
- :param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
489
- bottom.
490
- :param print_kwargs: dict, the print_api arguments.
491
- """
492
-
493
- date_pattern: str = '%Y_%m_%d'
494
-
495
- # Get all the file paths and their midnight rotations.
496
- logs_paths: list = reading.get_logs_paths(
497
- log_file_path=file_path,
498
- date_pattern=date_pattern
499
- )
500
-
501
- statistics_content: dict = {}
502
- # Read each file to its day.
503
- for log_path_dict in logs_paths:
504
- date_string = log_path_dict['date_string']
505
- statistics_content[date_string] = {}
506
-
507
- statistics_content[date_string]['file'] = log_path_dict
508
-
509
- log_file_content, log_file_header = (
510
- csvs.read_csv_to_list_of_dicts_by_header(log_path_dict['file_path'], **(print_kwargs or {})))
511
- statistics_content[date_string]['content'] = log_file_content
512
- statistics_content[date_string]['header'] = log_file_header
513
-
514
- statistics_content[date_string]['content_no_errors'] = get_content_without_errors(log_file_content)
515
-
516
- # Get the data dictionary from the statistics content.
517
- statistics_content[date_string]['statistics_daily'] = compute_statistics_from_content(
518
- statistics_content[date_string]['content_no_errors']
519
- )
520
-
521
- moving_average_dict: dict = compute_moving_averages_from_average_statistics(
522
- statistics_content,
523
- moving_average_window_days
524
- )
525
-
526
- # Add the moving average to the statistics content.
527
- for day, day_dict in statistics_content.items():
528
- try:
529
- day_dict['moving_average'] = moving_average_dict[day]
530
- except KeyError:
531
- day_dict['moving_average'] = {}
532
-
533
- # Find deviation from the moving average to the bottom or top by specified percentage.
534
- deviation_list: list = find_deviation_from_moving_average(
535
- statistics_content, top_bottom_deviation_percentage)
536
-
537
- return deviation_list
538
-
539
-
540
- def get_content_without_errors(content: list) -> list:
541
- """
542
- This function gets the 'statistics.csv' file content without errors from the 'content' list.
543
-
544
- :param content: list, the content list.
545
- :return: list, the content without errors.
546
- """
547
-
548
- traffic_statistics_without_errors: list = []
549
- for line in content:
550
- # Skip empty lines, headers and errors.
551
- if line['host'] == 'host' or line['command'] == '':
552
- continue
553
-
554
- traffic_statistics_without_errors.append(line)
555
-
556
- return traffic_statistics_without_errors
557
-
558
-
559
- def get_data_dict_from_statistics_content(content: list) -> dict:
560
- """
561
- This function gets the data dictionary from the 'statistics.csv' file content.
562
-
563
- :param content: list, the content list.
564
- :return: dict, the data dictionary.
565
- """
566
-
567
- hosts_requests_responses: dict = {}
568
- for line in content:
569
- # If subdomain is not in the dictionary, add it.
570
- if line['host'] not in hosts_requests_responses:
571
- hosts_requests_responses[line['host']] = {
572
- 'request_sizes': [],
573
- 'response_sizes': []
574
- }
575
-
576
- # Append the sizes.
577
- try:
578
- hosts_requests_responses[line['host']]['request_sizes'].append(int(line['request_size_bytes']))
579
- hosts_requests_responses[line['host']]['response_sizes'].append(
580
- int(line['response_size_bytes']))
581
- except ValueError:
582
- print_api(line, color='yellow')
583
- raise
584
-
585
- return hosts_requests_responses
586
-
587
-
588
- def compute_statistics_from_data_dict(data_dict: dict):
589
- """
590
- This function computes the statistics from the data dictionary.
591
-
592
- :param data_dict: dict, the data dictionary.
593
- :return: dict, the statistics dictionary.
594
- """
595
-
596
- for host, host_dict in data_dict.items():
597
- count = len(host_dict['request_sizes'])
598
- avg_request_size = statistics.mean(host_dict['request_sizes']) if count > 0 else 0
599
- median_request_size = statistics.median(host_dict['request_sizes']) if count > 0 else 0
600
- avg_response_size = statistics.mean(host_dict['response_sizes']) if count > 0 else 0
601
- median_response_size = statistics.median(host_dict['response_sizes']) if count > 0 else 0
602
-
603
- data_dict[host]['count'] = count
604
- data_dict[host]['avg_request_size'] = avg_request_size
605
- data_dict[host]['median_request_size'] = median_request_size
606
- data_dict[host]['avg_response_size'] = avg_response_size
607
- data_dict[host]['median_response_size'] = median_response_size
608
-
609
-
610
- def compute_statistics_from_content(content: list):
611
- """
612
- This function computes the statistics from the 'statistics.csv' file content.
613
-
614
- :param content: list, the content list.
615
- :return: dict, the statistics dictionary.
616
- """
617
-
618
- hosts_requests_responses: dict = get_data_dict_from_statistics_content(content)
619
- compute_statistics_from_data_dict(hosts_requests_responses)
620
-
621
- return hosts_requests_responses
622
-
623
-
624
- def compute_moving_averages_from_average_statistics(
625
- average_statistics_dict: dict,
626
- moving_average_window_days: int
627
- ):
628
- """
629
- This function computes the moving averages from the average statistics dictionary.
630
-
631
- :param average_statistics_dict: dict, the average statistics dictionary.
632
- :param moving_average_window_days: integer, the window size for the moving average.
633
- :return: dict, the moving averages dictionary.
634
- """
635
-
636
- moving_average: dict = {}
637
- for day_index, (day, day_dict) in enumerate(average_statistics_dict.items()):
638
- current_day = day_index + 1
639
- if current_day < moving_average_window_days:
640
- continue
641
-
642
- # Create list of the previous 'moving_average_window_days' days.
643
- previous_days_content_list = (
644
- list(average_statistics_dict.values()))[current_day-moving_average_window_days:current_day]
645
-
646
- # Compute the moving averages.
647
- moving_average[day] = compute_average_for_current_day_from_past_x_days(previous_days_content_list)
648
-
649
- return moving_average
650
-
651
-
652
- def compute_average_for_current_day_from_past_x_days(previous_days_content_list: list) -> dict:
653
- """
654
- This function computes the average for the current day from the past x days.
655
-
656
- :param previous_days_content_list: list, the list of the previous days content.
657
- :return: dict, the average dictionary.
658
- """
659
-
660
- moving_average: dict = {}
661
- for entry in previous_days_content_list:
662
- statistics_daily = entry['statistics_daily']
663
- for host, host_dict in statistics_daily.items():
664
- if host not in moving_average:
665
- moving_average[host] = {
666
- 'counts': [],
667
- 'avg_request_sizes': [],
668
- 'avg_response_sizes': [],
669
- }
670
-
671
- moving_average[host]['counts'].append(int(host_dict['count']))
672
- moving_average[host]['avg_request_sizes'].append(float(host_dict['avg_request_size']))
673
- moving_average[host]['avg_response_sizes'].append(float(host_dict['avg_response_size']))
674
-
675
- # Compute the moving average.
676
- moving_average_results: dict = {}
677
- for host, host_dict in moving_average.items():
678
- ma_count = statistics.mean(host_dict['counts'])
679
- ma_request_size = statistics.mean(host_dict['avg_request_sizes'])
680
- ma_response_size = statistics.mean(host_dict['avg_response_sizes'])
681
-
682
- moving_average_results[host] = {
683
- 'ma_count': ma_count,
684
- 'ma_request_size': ma_request_size,
685
- 'ma_response_size': ma_response_size,
686
- 'counts': host_dict['counts'],
687
- 'avg_request_sizes': host_dict['avg_request_sizes'],
688
- 'avg_response_sizes': host_dict['avg_response_sizes']
689
- }
690
-
691
- return moving_average_results
692
-
693
-
694
- def find_deviation_from_moving_average(
695
- statistics_content: dict,
696
- top_bottom_deviation_percentage: float
697
- ) -> list:
698
- """
699
- This function finds the deviation from the moving average to the bottom or top by specified percentage.
700
-
701
- :param statistics_content: dict, the statistics content dictionary.
702
- :param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
703
- bottom.
704
- :return: list, the deviation list.
705
- """
706
-
707
- def _check_deviation(
708
- check_type: Literal['count', 'avg_request_size', 'avg_response_size'],
709
- ma_check_type: Literal['ma_count', 'ma_request_size', 'ma_response_size'],
710
- day_statistics_content_dict: dict,
711
- moving_averages_dict: dict
712
- ):
713
- """
714
- This function checks the deviation for the host.
715
- """
716
-
717
- nonlocal message
718
-
719
- host_moving_average_by_type = moving_averages_dict[host][ma_check_type]
720
- check_type_moving_by_percent = (
721
- host_moving_average_by_type * top_bottom_deviation_percentage)
722
- check_type_moving_above = host_moving_average_by_type + check_type_moving_by_percent
723
- check_type_moving_below = host_moving_average_by_type - check_type_moving_by_percent
724
-
725
- deviation_type = None
726
- if day_statistics_content_dict[check_type] > check_type_moving_above:
727
- deviation_type = 'above'
728
- elif day_statistics_content_dict[check_type] < check_type_moving_below:
729
- deviation_type = 'below'
730
-
731
- if deviation_type:
732
- message = f'[{check_type}] is [{deviation_type}] the moving average.'
733
- deviation_list.append({
734
- 'day': day,
735
- 'host': host,
736
- 'message': message,
737
- 'value': day_statistics_content_dict[check_type],
738
- 'ma_value': host_moving_average_by_type,
739
- 'check_type': check_type,
740
- 'percentage': top_bottom_deviation_percentage,
741
- 'ma_value_checked': check_type_moving_above,
742
- 'deviation_type': deviation_type,
743
- 'data': day_statistics_content_dict,
744
- 'ma_data': moving_averages_dict[host]
745
- })
746
-
747
- deviation_list: list = []
748
- for day_index, (day, day_dict) in enumerate(statistics_content.items()):
749
- # If it's the first day, there is no previous day moving average.
750
- if day_index == 0:
751
- previous_day_moving_average_dict = {}
752
- else:
753
- previous_day_moving_average_dict = list(statistics_content.values())[day_index-1].get('moving_average', {})
754
-
755
- # If there is no moving average for previous day continue to the next day.
756
- if not previous_day_moving_average_dict:
757
- continue
758
-
759
- for host, host_dict in day_dict['statistics_daily'].items():
760
- # If the host is not in the moving averages, then this is clear deviation.
761
- # It means that in the current day, there were no requests for this host.
762
- if host not in previous_day_moving_average_dict:
763
- message = f'Host not in the moving averages: {host}'
764
- deviation_list.append({
765
- 'day': day,
766
- 'host': host,
767
- 'data': host_dict,
768
- 'message': message,
769
- 'type': 'clear'
770
- })
771
- continue
772
-
773
- _check_deviation(
774
- 'count', 'ma_count', host_dict, previous_day_moving_average_dict)
775
- _check_deviation(
776
- 'avg_request_size', 'ma_request_size', host_dict, previous_day_moving_average_dict)
777
- _check_deviation(
778
- 'avg_response_size', 'ma_response_size', host_dict, previous_day_moving_average_dict)
779
-
780
- return deviation_list
781
-
782
-
783
- def moving_average_calculator_main(
784
- statistics_file_path: str,
785
- output_directory: str,
347
+ def deviation_calculator_by_moving_average_main(
348
+ statistics_file_directory: str,
786
349
  moving_average_window_days: int,
787
- top_bottom_deviation_percentage: float
788
- ) -> int:
350
+ top_bottom_deviation_percentage: float,
351
+ get_deviation_for_last_day_only: bool = False,
352
+ summary: bool = False,
353
+ output_json_file_path: str = None
354
+ ) -> Union[list, None]:
789
355
  """
790
356
  This function is the main function for the moving average calculator.
791
357
 
792
- :param statistics_file_path: string, the statistics file path.
793
- :param output_directory: string, the output directory.
358
+ :param statistics_file_directory: string, the directory where 'statistics.csv' file resides.
359
+ Also, all the rotated files like: statistics_2021-01-01.csv, statistics_2021-01-02.csv, etc.
360
+ These will be analyzed in the order of the date in the file name.
794
361
  :param moving_average_window_days: integer, the moving average window days.
795
362
  :param top_bottom_deviation_percentage: float, the top bottom deviation percentage. Example: 0.1 for 10%.
796
- :return: integer, the return code.
363
+ :param get_deviation_for_last_day_only: bool, if True, only the last day will be analyzed.
364
+ Example: With 'moving_average_window_days=5', the last 6 days will be analyzed.
365
+ 5 days for moving average and the last day for deviation.
366
+ File names example:
367
+ statistics_2021-01-01.csv
368
+ statistics_2021-01-02.csv
369
+ statistics_2021-01-03.csv
370
+ statistics_2021-01-04.csv
371
+ statistics_2021-01-05.csv
372
+ statistics_2021-01-06.csv
373
+ Files 01 to 05 will be used for moving average and the file 06 for deviation.
374
+ Meaning the average calculated for 2021-01-06 will be compared to the values moving average of 2021-01-01
375
+ to 2021-01-05.
376
+ :param summary: bool, if True, Only the summary will be generated without all the numbers that were used
377
+ to calculate the averages and the moving average data.
378
+ :param output_json_file_path: string, if None, no json file will be written.
797
379
  -----------------------------
380
+ :return: the deviation list of dicts.
798
381
 
799
382
  Example:
800
383
  import sys
@@ -804,9 +387,9 @@ def moving_average_calculator_main(
804
387
  def main():
805
388
  return statistic_analyzer.moving_average_calculator_main(
806
389
  statistics_file_path='statistics.csv',
807
- output_directory='output',
808
390
  moving_average_window_days=7,
809
- top_bottom_deviation_percentage=0.1
391
+ top_bottom_deviation_percentage=0.1,
392
+ output_json_file='C:\\output\\deviation_list.json'
810
393
  )
811
394
 
812
395
 
@@ -814,6 +397,8 @@ def moving_average_calculator_main(
814
397
  sys.exit(main())
815
398
  """
816
399
 
400
+ statistics_file_path: str = f'{statistics_file_directory}{os.sep}{STATISTICS_FILE_NAME}'
401
+
817
402
  def convert_data_value_to_string(value_key: str, list_index: int) -> None:
818
403
  deviation_list[list_index]['data'][value_key] = json.dumps(deviation_list[list_index]['data'][value_key])
819
404
 
@@ -821,20 +406,37 @@ def moving_average_calculator_main(
821
406
  if value_key in deviation_list[list_index]:
822
407
  deviation_list[list_index][value_key] = json.dumps(deviation_list[list_index][value_key])
823
408
 
824
- deviation_list = calculate_moving_average(
409
+ deviation_list = moving_average_helper.calculate_moving_average(
825
410
  statistics_file_path,
826
411
  moving_average_window_days,
827
- top_bottom_deviation_percentage
412
+ top_bottom_deviation_percentage,
413
+ get_deviation_for_last_day_only
828
414
  )
829
415
 
830
416
  if deviation_list:
831
- for deviation_list_index, deviation in enumerate(deviation_list):
832
- convert_data_value_to_string('request_sizes', deviation_list_index)
833
- convert_data_value_to_string('response_sizes', deviation_list_index)
834
- convert_value_to_string('ma_data', deviation_list_index)
417
+ if output_json_file_path:
418
+ for deviation_list_index, deviation in enumerate(deviation_list):
419
+ convert_data_value_to_string('request_sizes', deviation_list_index)
420
+ convert_data_value_to_string('response_sizes', deviation_list_index)
421
+ convert_value_to_string('ma_data', deviation_list_index)
422
+
423
+ print_api(f'Deviation Found, saving to file: {output_json_file_path}', color='blue')
424
+ jsons.write_json_file(deviation_list, output_json_file_path, use_default_indent=True)
425
+
426
+ if summary:
427
+ summary_deviation_list: list = []
428
+ for deviation in deviation_list:
429
+ summary_deviation_list.append({
430
+ 'day': deviation['day'],
431
+ 'host': deviation['host'],
432
+ 'message': deviation['message'],
433
+ 'value': deviation['value'],
434
+ 'ma_value': deviation['ma_value'],
435
+ 'total_entries_averaged': deviation['data']['count']
436
+ })
437
+
438
+ deviation_list = summary_deviation_list
835
439
 
836
- file_path = output_directory + os.sep + 'deviation.json'
837
- print_api(f'Deviation Found, saving to file: {file_path}', color='blue')
838
- jsons.write_json_file(deviation_list, file_path, use_default_indent=True)
440
+ return deviation_list
839
441
 
840
- return 0
442
+ return None