atomicshop 2.11.47__py3-none-any.whl → 3.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomicshop/__init__.py +1 -1
- atomicshop/{addons/mains → a_mains}/FACT/update_extract.py +3 -2
- atomicshop/a_mains/addons/process_list/compile.cmd +7 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.dll +0 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.exp +0 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.lib +0 -0
- atomicshop/{addons → a_mains/addons}/process_list/process_list.cpp +8 -1
- atomicshop/a_mains/dns_gateway_setting.py +11 -0
- atomicshop/a_mains/get_local_tcp_ports.py +85 -0
- atomicshop/a_mains/github_wrapper.py +11 -0
- atomicshop/a_mains/install_ca_certificate.py +172 -0
- atomicshop/{addons/mains → a_mains}/msi_unpacker.py +3 -1
- atomicshop/a_mains/process_from_port.py +119 -0
- atomicshop/a_mains/set_default_dns_gateway.py +90 -0
- atomicshop/a_mains/update_config_toml.py +38 -0
- atomicshop/appointment_management.py +5 -3
- atomicshop/basics/ansi_escape_codes.py +3 -1
- atomicshop/basics/argparse_template.py +2 -0
- atomicshop/basics/booleans.py +27 -30
- atomicshop/basics/bytes_arrays.py +43 -0
- atomicshop/basics/classes.py +149 -1
- atomicshop/basics/dicts.py +12 -0
- atomicshop/basics/enums.py +2 -2
- atomicshop/basics/exceptions.py +5 -1
- atomicshop/basics/list_of_classes.py +29 -0
- atomicshop/basics/list_of_dicts.py +69 -5
- atomicshop/basics/lists.py +14 -0
- atomicshop/basics/multiprocesses.py +374 -50
- atomicshop/basics/package_module.py +10 -0
- atomicshop/basics/strings.py +160 -7
- atomicshop/basics/threads.py +14 -0
- atomicshop/basics/tracebacks.py +13 -4
- atomicshop/certificates.py +153 -52
- atomicshop/config_init.py +12 -7
- atomicshop/console_user_response.py +7 -14
- atomicshop/consoles.py +9 -0
- atomicshop/datetimes.py +98 -0
- atomicshop/diff_check.py +340 -40
- atomicshop/dns.py +128 -12
- atomicshop/etws/_pywintrace_fix.py +17 -0
- atomicshop/etws/const.py +38 -0
- atomicshop/etws/providers.py +21 -0
- atomicshop/etws/sessions.py +43 -0
- atomicshop/etws/trace.py +168 -0
- atomicshop/etws/traces/trace_dns.py +162 -0
- atomicshop/etws/traces/trace_sysmon_process_creation.py +126 -0
- atomicshop/etws/traces/trace_tcp.py +130 -0
- atomicshop/file_io/csvs.py +222 -24
- atomicshop/file_io/docxs.py +35 -18
- atomicshop/file_io/file_io.py +35 -19
- atomicshop/file_io/jsons.py +49 -0
- atomicshop/file_io/tomls.py +139 -0
- atomicshop/filesystem.py +864 -293
- atomicshop/get_process_list.py +133 -0
- atomicshop/{process_name_cmd.py → get_process_name_cmd_dll.py} +52 -19
- atomicshop/http_parse.py +149 -93
- atomicshop/ip_addresses.py +6 -1
- atomicshop/mitm/centered_settings.py +132 -0
- atomicshop/mitm/config_static.py +207 -0
- atomicshop/mitm/config_toml_editor.py +55 -0
- atomicshop/mitm/connection_thread_worker.py +875 -357
- atomicshop/mitm/engines/__parent/parser___parent.py +4 -17
- atomicshop/mitm/engines/__parent/recorder___parent.py +108 -51
- atomicshop/mitm/engines/__parent/requester___parent.py +116 -0
- atomicshop/mitm/engines/__parent/responder___parent.py +75 -114
- atomicshop/mitm/engines/__reference_general/parser___reference_general.py +10 -7
- atomicshop/mitm/engines/__reference_general/recorder___reference_general.py +5 -5
- atomicshop/mitm/engines/__reference_general/requester___reference_general.py +47 -0
- atomicshop/mitm/engines/__reference_general/responder___reference_general.py +95 -13
- atomicshop/mitm/engines/create_module_template.py +58 -14
- atomicshop/mitm/import_config.py +359 -139
- atomicshop/mitm/initialize_engines.py +160 -74
- atomicshop/mitm/message.py +64 -23
- atomicshop/mitm/mitm_main.py +892 -0
- atomicshop/mitm/recs_files.py +183 -0
- atomicshop/mitm/shared_functions.py +4 -10
- atomicshop/mitm/ssh_tester.py +82 -0
- atomicshop/mitm/statistic_analyzer.py +257 -166
- atomicshop/mitm/statistic_analyzer_helper/analyzer_helper.py +136 -0
- atomicshop/mitm/statistic_analyzer_helper/moving_average_helper.py +525 -0
- atomicshop/monitor/change_monitor.py +96 -120
- atomicshop/monitor/checks/dns.py +139 -70
- atomicshop/monitor/checks/file.py +77 -0
- atomicshop/monitor/checks/network.py +81 -77
- atomicshop/monitor/checks/process_running.py +33 -34
- atomicshop/monitor/checks/url.py +94 -0
- atomicshop/networks.py +671 -0
- atomicshop/on_exit.py +205 -0
- atomicshop/package_mains_processor.py +84 -0
- atomicshop/permissions/permissions.py +22 -0
- atomicshop/permissions/ubuntu_permissions.py +239 -0
- atomicshop/permissions/win_permissions.py +33 -0
- atomicshop/print_api.py +24 -41
- atomicshop/process.py +63 -17
- atomicshop/process_poller/__init__.py +0 -0
- atomicshop/process_poller/pollers/__init__.py +0 -0
- atomicshop/process_poller/pollers/psutil_pywin32wmi_dll.py +95 -0
- atomicshop/process_poller/process_pool.py +207 -0
- atomicshop/process_poller/simple_process_pool.py +311 -0
- atomicshop/process_poller/tracer_base.py +45 -0
- atomicshop/process_poller/tracers/__init__.py +0 -0
- atomicshop/process_poller/tracers/event_log.py +46 -0
- atomicshop/process_poller/tracers/sysmon_etw.py +68 -0
- atomicshop/python_file_patcher.py +1 -1
- atomicshop/python_functions.py +27 -75
- atomicshop/question_answer_engine.py +2 -2
- atomicshop/scheduling.py +24 -5
- atomicshop/sound.py +4 -2
- atomicshop/speech_recognize.py +8 -0
- atomicshop/ssh_remote.py +158 -172
- atomicshop/startup/__init__.py +0 -0
- atomicshop/startup/win/__init__.py +0 -0
- atomicshop/startup/win/startup_folder.py +53 -0
- atomicshop/startup/win/task_scheduler.py +119 -0
- atomicshop/system_resource_monitor.py +61 -46
- atomicshop/system_resources.py +8 -8
- atomicshop/tempfiles.py +1 -2
- atomicshop/timer.py +30 -11
- atomicshop/urls.py +41 -0
- atomicshop/venvs.py +28 -0
- atomicshop/versioning.py +27 -0
- atomicshop/web.py +110 -25
- atomicshop/web_apis/__init__.py +0 -0
- atomicshop/web_apis/google_custom_search.py +44 -0
- atomicshop/web_apis/google_llm.py +188 -0
- atomicshop/websocket_parse.py +450 -0
- atomicshop/wrappers/certauthw/certauth.py +1 -0
- atomicshop/wrappers/cryptographyw.py +29 -8
- atomicshop/wrappers/ctyping/etw_winapi/__init__.py +0 -0
- atomicshop/wrappers/ctyping/etw_winapi/const.py +335 -0
- atomicshop/wrappers/ctyping/etw_winapi/etw_functions.py +393 -0
- atomicshop/wrappers/ctyping/file_details_winapi.py +67 -0
- atomicshop/wrappers/ctyping/msi_windows_installer/cabs.py +2 -1
- atomicshop/wrappers/ctyping/msi_windows_installer/extract_msi_main.py +13 -9
- atomicshop/wrappers/ctyping/msi_windows_installer/tables.py +35 -0
- atomicshop/wrappers/ctyping/setup_device.py +466 -0
- atomicshop/wrappers/ctyping/win_console.py +39 -0
- atomicshop/wrappers/dockerw/dockerw.py +113 -2
- atomicshop/wrappers/elasticsearchw/config_basic.py +0 -12
- atomicshop/wrappers/elasticsearchw/elastic_infra.py +75 -0
- atomicshop/wrappers/elasticsearchw/elasticsearchw.py +2 -20
- atomicshop/wrappers/factw/get_file_data.py +12 -5
- atomicshop/wrappers/factw/install/install_after_restart.py +89 -5
- atomicshop/wrappers/factw/install/pre_install_and_install_before_restart.py +20 -14
- atomicshop/wrappers/factw/postgresql/firmware.py +4 -6
- atomicshop/wrappers/githubw.py +583 -51
- atomicshop/wrappers/loggingw/consts.py +49 -0
- atomicshop/wrappers/loggingw/filters.py +102 -0
- atomicshop/wrappers/loggingw/formatters.py +58 -71
- atomicshop/wrappers/loggingw/handlers.py +459 -40
- atomicshop/wrappers/loggingw/loggers.py +19 -0
- atomicshop/wrappers/loggingw/loggingw.py +1010 -178
- atomicshop/wrappers/loggingw/reading.py +344 -19
- atomicshop/wrappers/mongodbw/__init__.py +0 -0
- atomicshop/wrappers/mongodbw/mongo_infra.py +31 -0
- atomicshop/wrappers/mongodbw/mongodbw.py +1432 -0
- atomicshop/wrappers/netshw.py +271 -0
- atomicshop/wrappers/playwrightw/engine.py +34 -19
- atomicshop/wrappers/playwrightw/infra.py +5 -0
- atomicshop/wrappers/playwrightw/javascript.py +7 -3
- atomicshop/wrappers/playwrightw/keyboard.py +14 -0
- atomicshop/wrappers/playwrightw/scenarios.py +172 -5
- atomicshop/wrappers/playwrightw/waits.py +9 -7
- atomicshop/wrappers/powershell_networking.py +80 -0
- atomicshop/wrappers/psutilw/processes.py +81 -0
- atomicshop/wrappers/psutilw/psutil_networks.py +85 -0
- atomicshop/wrappers/psutilw/psutilw.py +9 -0
- atomicshop/wrappers/pyopensslw.py +9 -2
- atomicshop/wrappers/pywin32w/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/cert_store.py +116 -0
- atomicshop/wrappers/pywin32w/console.py +34 -0
- atomicshop/wrappers/pywin32w/win_event_log/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/win_event_log/fetch.py +174 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribe.py +212 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_create.py +57 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_terminate.py +49 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/schannel_logging.py +97 -0
- atomicshop/wrappers/pywin32w/winshell.py +19 -0
- atomicshop/wrappers/pywin32w/wmis/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/wmis/msft_netipaddress.py +113 -0
- atomicshop/wrappers/pywin32w/wmis/win32_networkadapterconfiguration.py +259 -0
- atomicshop/wrappers/pywin32w/wmis/win32networkadapter.py +112 -0
- atomicshop/wrappers/pywin32w/wmis/wmi_helpers.py +236 -0
- atomicshop/wrappers/socketw/accepter.py +21 -7
- atomicshop/wrappers/socketw/certificator.py +216 -150
- atomicshop/wrappers/socketw/creator.py +190 -50
- atomicshop/wrappers/socketw/dns_server.py +500 -173
- atomicshop/wrappers/socketw/exception_wrapper.py +45 -52
- atomicshop/wrappers/socketw/process_getter.py +86 -0
- atomicshop/wrappers/socketw/receiver.py +144 -102
- atomicshop/wrappers/socketw/sender.py +65 -35
- atomicshop/wrappers/socketw/sni.py +334 -165
- atomicshop/wrappers/socketw/socket_base.py +134 -0
- atomicshop/wrappers/socketw/socket_client.py +137 -95
- atomicshop/wrappers/socketw/socket_server_tester.py +14 -9
- atomicshop/wrappers/socketw/socket_wrapper.py +717 -116
- atomicshop/wrappers/socketw/ssl_base.py +15 -14
- atomicshop/wrappers/socketw/statistics_csv.py +148 -17
- atomicshop/wrappers/sysmonw.py +157 -0
- atomicshop/wrappers/ubuntu_terminal.py +65 -26
- atomicshop/wrappers/win_auditw.py +189 -0
- atomicshop/wrappers/winregw/__init__.py +0 -0
- atomicshop/wrappers/winregw/winreg_installed_software.py +58 -0
- atomicshop/wrappers/winregw/winreg_network.py +232 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/METADATA +31 -49
- atomicshop-3.10.5.dist-info/RECORD +306 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/WHEEL +1 -1
- atomicshop/_basics_temp.py +0 -101
- atomicshop/addons/a_setup_scripts/install_psycopg2_ubuntu.sh +0 -3
- atomicshop/addons/a_setup_scripts/install_pywintrace_0.3.cmd +0 -2
- atomicshop/addons/mains/install_docker_rootless_ubuntu.py +0 -11
- atomicshop/addons/mains/install_docker_ubuntu_main_sudo.py +0 -11
- atomicshop/addons/mains/install_elastic_search_and_kibana_ubuntu.py +0 -10
- atomicshop/addons/mains/install_wsl_ubuntu_lts_admin.py +0 -9
- atomicshop/addons/package_setup/CreateWheel.cmd +0 -7
- atomicshop/addons/package_setup/Setup in Edit mode.cmd +0 -6
- atomicshop/addons/package_setup/Setup.cmd +0 -7
- atomicshop/addons/process_list/compile.cmd +0 -2
- atomicshop/addons/process_list/compiled/Win10x64/process_list.dll +0 -0
- atomicshop/addons/process_list/compiled/Win10x64/process_list.exp +0 -0
- atomicshop/addons/process_list/compiled/Win10x64/process_list.lib +0 -0
- atomicshop/archiver/_search_in_zip.py +0 -189
- atomicshop/archiver/archiver.py +0 -34
- atomicshop/archiver/search_in_archive.py +0 -250
- atomicshop/archiver/sevenz_app_w.py +0 -86
- atomicshop/archiver/sevenzs.py +0 -44
- atomicshop/archiver/zips.py +0 -293
- atomicshop/etw/dns_trace.py +0 -118
- atomicshop/etw/etw.py +0 -61
- atomicshop/file_types.py +0 -24
- atomicshop/mitm/engines/create_module_template_example.py +0 -13
- atomicshop/mitm/initialize_mitm_server.py +0 -240
- atomicshop/monitor/checks/hash.py +0 -44
- atomicshop/monitor/checks/hash_checks/file.py +0 -55
- atomicshop/monitor/checks/hash_checks/url.py +0 -62
- atomicshop/pbtkmultifile_argparse.py +0 -88
- atomicshop/permissions.py +0 -110
- atomicshop/process_poller.py +0 -237
- atomicshop/script_as_string_processor.py +0 -38
- atomicshop/ssh_scripts/process_from_ipv4.py +0 -37
- atomicshop/ssh_scripts/process_from_port.py +0 -27
- atomicshop/wrappers/_process_wrapper_curl.py +0 -27
- atomicshop/wrappers/_process_wrapper_tar.py +0 -21
- atomicshop/wrappers/dockerw/install_docker.py +0 -209
- atomicshop/wrappers/elasticsearchw/infrastructure.py +0 -265
- atomicshop/wrappers/elasticsearchw/install_elastic.py +0 -232
- atomicshop/wrappers/ffmpegw.py +0 -125
- atomicshop/wrappers/loggingw/checks.py +0 -20
- atomicshop/wrappers/nodejsw/install_nodejs.py +0 -139
- atomicshop/wrappers/process_wrapper_pbtk.py +0 -16
- atomicshop/wrappers/socketw/base.py +0 -59
- atomicshop/wrappers/socketw/get_process.py +0 -107
- atomicshop/wrappers/wslw.py +0 -191
- atomicshop-2.11.47.dist-info/RECORD +0 -251
- /atomicshop/{addons/mains → a_mains}/FACT/factw_fact_extractor_docker_image_main_sudo.py +0 -0
- /atomicshop/{addons → a_mains/addons}/PlayWrightCodegen.cmd +0 -0
- /atomicshop/{addons → a_mains/addons}/ScriptExecution.cmd +0 -0
- /atomicshop/{addons/mains → a_mains/addons}/inits/init_to_import_all_modules.py +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/ReadMe.txt +0 -0
- /atomicshop/{addons/mains → a_mains}/search_for_hyperlinks_in_docx.py +0 -0
- /atomicshop/{archiver → etws}/__init__.py +0 -0
- /atomicshop/{etw → etws/traces}/__init__.py +0 -0
- /atomicshop/{monitor/checks/hash_checks → mitm/statistic_analyzer_helper}/__init__.py +0 -0
- /atomicshop/{wrappers/nodejsw → permissions}/__init__.py +0 -0
- /atomicshop/wrappers/pywin32w/{wmi_win32process.py → wmis/win32process.py} +0 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info/licenses}/LICENSE.txt +0 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
import statistics
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Literal
|
|
4
|
+
import datetime
|
|
5
|
+
|
|
6
|
+
from ...print_api import print_api
|
|
7
|
+
from ...wrappers.loggingw import reading, consts
|
|
8
|
+
from ...file_io import csvs
|
|
9
|
+
from ... import urls, filesystem
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def calculate_moving_average(
|
|
13
|
+
file_path: str = None,
|
|
14
|
+
statistics_content: dict = None,
|
|
15
|
+
by_type: Literal['host', 'url'] = 'url',
|
|
16
|
+
moving_average_window_days: int = 5,
|
|
17
|
+
top_bottom_deviation_percentage: float = 0.25,
|
|
18
|
+
get_deviation_for_last_day_only: bool = False,
|
|
19
|
+
get_deviation_for_date: str = None,
|
|
20
|
+
skip_total_count_less_than: int = None,
|
|
21
|
+
print_kwargs: dict = None
|
|
22
|
+
) -> list:
|
|
23
|
+
"""
|
|
24
|
+
This function calculates the moving average of the daily statistics.
|
|
25
|
+
|
|
26
|
+
:param file_path: string, the path to the 'statistics.csv' file.
|
|
27
|
+
:param statistics_content: dict, the statistics content dictionary. If provided, 'file_path' will be ignored.
|
|
28
|
+
The dictionary should be in the format returned by 'get_all_files_content' function.
|
|
29
|
+
:param by_type: string, the type to calculate the moving average by. Can be 'host' or 'url'.
|
|
30
|
+
:param moving_average_window_days: integer, the window size for the moving average.
|
|
31
|
+
:param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
|
|
32
|
+
bottom.
|
|
33
|
+
:param get_deviation_for_last_day_only: bool, check the 'get_all_files_content' function.
|
|
34
|
+
:param get_deviation_for_date: str, check the 'get_all_files_content' function.
|
|
35
|
+
:param skip_total_count_less_than: integer, if the total count is less than this number, skip the deviation.
|
|
36
|
+
:param print_kwargs: dict, the print_api arguments.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
if not file_path and not statistics_content:
|
|
40
|
+
raise ValueError('Either file_path or statistics_content must be provided.')
|
|
41
|
+
if file_path and statistics_content:
|
|
42
|
+
raise ValueError('Only one of file_path or statistics_content must be provided.')
|
|
43
|
+
|
|
44
|
+
if get_deviation_for_last_day_only and get_deviation_for_date:
|
|
45
|
+
raise ValueError('Only one of get_deviation_for_last_day_only or get_deviation_for_date can be set.')
|
|
46
|
+
|
|
47
|
+
if not statistics_content:
|
|
48
|
+
statistics_content: dict = get_all_files_content(
|
|
49
|
+
file_path=file_path, moving_average_window_days=moving_average_window_days,
|
|
50
|
+
get_deviation_for_last_day_only=get_deviation_for_last_day_only,
|
|
51
|
+
get_deviation_for_date=get_deviation_for_date,
|
|
52
|
+
print_kwargs=print_kwargs)
|
|
53
|
+
|
|
54
|
+
for date_string, day_dict in statistics_content.items():
|
|
55
|
+
day_dict['content_no_useless'] = get_content_without_useless(day_dict['content'])
|
|
56
|
+
|
|
57
|
+
# Get the data dictionary from the statistics content.
|
|
58
|
+
day_dict['statistics_daily'] = compute_statistics_from_content(
|
|
59
|
+
day_dict['content_no_useless'], by_type)
|
|
60
|
+
|
|
61
|
+
moving_average_dict: dict = compute_moving_averages_from_average_statistics(
|
|
62
|
+
statistics_content,
|
|
63
|
+
moving_average_window_days
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Add the moving average to the statistics content.
|
|
67
|
+
for day, day_dict in statistics_content.items():
|
|
68
|
+
try:
|
|
69
|
+
day_dict['moving_average'] = moving_average_dict[day]
|
|
70
|
+
except KeyError:
|
|
71
|
+
day_dict['moving_average'] = {}
|
|
72
|
+
|
|
73
|
+
# Find deviation from the moving average to the bottom or top by specified percentage.
|
|
74
|
+
deviation_list: list = find_deviation_from_moving_average(
|
|
75
|
+
statistics_content, top_bottom_deviation_percentage, skip_total_count_less_than)
|
|
76
|
+
|
|
77
|
+
return deviation_list
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_all_files_content(
|
|
81
|
+
file_path: str,
|
|
82
|
+
moving_average_window_days: int,
|
|
83
|
+
get_deviation_for_last_day_only: bool = False,
|
|
84
|
+
get_deviation_for_date: str = None,
|
|
85
|
+
print_kwargs: dict = None
|
|
86
|
+
) -> dict:
|
|
87
|
+
"""
|
|
88
|
+
Get the dictionary that will contain all the details of the file, like date, header and content, to prepare for the MA analysis.
|
|
89
|
+
|
|
90
|
+
:param file_path: string, the path to the 'statistics.csv' file.
|
|
91
|
+
:param moving_average_window_days: integer, the window size for the moving average.
|
|
92
|
+
:param get_deviation_for_last_day_only: bool, if True, only the last day will be analyzed.
|
|
93
|
+
Example: With 'moving_average_window_days=5', the last 6 days will be analyzed.
|
|
94
|
+
5 days for moving average and the last day for deviation.
|
|
95
|
+
File names example the last day is 2021-01-06:
|
|
96
|
+
statistics_2021-01-01.csv
|
|
97
|
+
statistics_2021-01-02.csv
|
|
98
|
+
statistics_2021-01-03.csv
|
|
99
|
+
statistics_2021-01-04.csv
|
|
100
|
+
statistics_2021-01-05.csv
|
|
101
|
+
statistics_2021-01-06.csv
|
|
102
|
+
Files 01 to 05 will be used for moving average and the file 06 for deviation.
|
|
103
|
+
Meaning the average calculated for 2021-01-06 will be compared to the values moving average of 2021-01-01
|
|
104
|
+
to 2021-01-05.
|
|
105
|
+
:param get_deviation_for_date: str, if set, the last day is considered the date that you set here.
|
|
106
|
+
The format should be the same as in the file names, e.g. 'YYYY-MM-DD'.
|
|
107
|
+
:param print_kwargs: dict, the print_api arguments.
|
|
108
|
+
:return:
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
if get_deviation_for_last_day_only and get_deviation_for_date:
|
|
112
|
+
raise ValueError('Only one of get_deviation_for_last_day_only or get_deviation_for_date can be set.')
|
|
113
|
+
|
|
114
|
+
date_format: str = consts.DEFAULT_ROTATING_SUFFIXES_FROM_WHEN['midnight']
|
|
115
|
+
|
|
116
|
+
def is_valid_date(date_str: str) -> bool:
|
|
117
|
+
try:
|
|
118
|
+
datetime.datetime.strptime(date_str, date_format)
|
|
119
|
+
return True
|
|
120
|
+
except ValueError:
|
|
121
|
+
return False
|
|
122
|
+
|
|
123
|
+
# Get all the file paths and their midnight rotations.
|
|
124
|
+
logs_paths: list[filesystem.AtomicPath] = reading.get_logs_paths(
|
|
125
|
+
log_file_path=file_path,
|
|
126
|
+
date_format=date_format
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if get_deviation_for_last_day_only:
|
|
130
|
+
days_back_to_analyze: int = moving_average_window_days + 1
|
|
131
|
+
logs_paths = logs_paths[-days_back_to_analyze:]
|
|
132
|
+
|
|
133
|
+
if get_deviation_for_date:
|
|
134
|
+
# Check if the date format is correct.
|
|
135
|
+
if not is_valid_date(get_deviation_for_date):
|
|
136
|
+
raise ValueError(f'Date [{get_deviation_for_date}] is not in the correct format: {date_format}')
|
|
137
|
+
|
|
138
|
+
# Find the index of the date in the logs_paths list.
|
|
139
|
+
date_index: int | None = None
|
|
140
|
+
for index, log_atomic_path in enumerate(logs_paths):
|
|
141
|
+
if log_atomic_path.datetime_string == get_deviation_for_date:
|
|
142
|
+
date_index = index
|
|
143
|
+
break
|
|
144
|
+
|
|
145
|
+
if date_index is None:
|
|
146
|
+
raise ValueError(f'Date {get_deviation_for_date} not found in the log files.')
|
|
147
|
+
|
|
148
|
+
start_index: int = max(0, date_index - moving_average_window_days)
|
|
149
|
+
logs_paths = logs_paths[start_index:date_index + 1]
|
|
150
|
+
|
|
151
|
+
statistics_content: dict = {}
|
|
152
|
+
# Read each file to its day.
|
|
153
|
+
for log_atomic_path in logs_paths:
|
|
154
|
+
date_string: str = log_atomic_path.datetime_string
|
|
155
|
+
statistics_content[date_string] = {}
|
|
156
|
+
|
|
157
|
+
statistics_content[date_string]['file'] = log_atomic_path
|
|
158
|
+
|
|
159
|
+
log_file_content, log_file_header = (
|
|
160
|
+
csvs.read_csv_to_list_of_dicts_by_header(log_atomic_path.path, **(print_kwargs or {})))
|
|
161
|
+
statistics_content[date_string]['content'] = log_file_content
|
|
162
|
+
statistics_content[date_string]['header'] = log_file_header
|
|
163
|
+
|
|
164
|
+
return statistics_content
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def get_content_without_useless(content: list) -> list:
|
|
168
|
+
"""
|
|
169
|
+
This function gets the 'statistics.csv' file content without errors from the 'content' list.
|
|
170
|
+
|
|
171
|
+
:param content: list, the content list.
|
|
172
|
+
:return: list, the content without errors.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
traffic_statistics_without_errors: list = []
|
|
176
|
+
for line in content:
|
|
177
|
+
# Skip empty lines, headers and errors.
|
|
178
|
+
if line['host'] == 'host' or (line['request_size_bytes'] == '' and line['response_size_bytes'] == ''):
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
traffic_statistics_without_errors.append(line)
|
|
182
|
+
|
|
183
|
+
return traffic_statistics_without_errors
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def get_data_dict_from_statistics_content(
|
|
187
|
+
content: list,
|
|
188
|
+
by_type: Literal['host', 'url']
|
|
189
|
+
) -> dict:
|
|
190
|
+
"""
|
|
191
|
+
This function gets the data dictionary from the 'statistics.csv' file content.
|
|
192
|
+
|
|
193
|
+
:param content: list, the content list.
|
|
194
|
+
:param by_type: string, the type to calculate the moving average by. Can be 'host' or 'url'.
|
|
195
|
+
:return: dict, the data dictionary.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
hosts_requests_responses: dict = {}
|
|
199
|
+
for line in content:
|
|
200
|
+
if by_type == 'host':
|
|
201
|
+
type_to_check: str = line['host']
|
|
202
|
+
elif by_type == 'url':
|
|
203
|
+
# Combine host and path to URL.
|
|
204
|
+
type_to_check: str = line['host'] + line['path']
|
|
205
|
+
# Remove the parameters from the URL.
|
|
206
|
+
url_parsed = urls.url_parser(type_to_check)
|
|
207
|
+
|
|
208
|
+
if url_parsed['file'] and Path(url_parsed['file']).suffix in ['.gz', '.gzip', '.zip']:
|
|
209
|
+
type_to_check = '/'.join(url_parsed['directories'][:-1])
|
|
210
|
+
else:
|
|
211
|
+
type_to_check = url_parsed['path']
|
|
212
|
+
|
|
213
|
+
# Remove the last slash from the URL.
|
|
214
|
+
type_to_check = type_to_check.removesuffix('/')
|
|
215
|
+
else:
|
|
216
|
+
raise ValueError(f'Invalid by_type: {by_type}')
|
|
217
|
+
|
|
218
|
+
# If subdomain is not in the dictionary, add it.
|
|
219
|
+
if type_to_check not in hosts_requests_responses:
|
|
220
|
+
hosts_requests_responses[type_to_check] = {
|
|
221
|
+
'request_sizes': [],
|
|
222
|
+
'response_sizes': []
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
# Append the sizes.
|
|
226
|
+
try:
|
|
227
|
+
request_size_bytes = line['request_size_bytes']
|
|
228
|
+
response_size_bytes = line['response_size_bytes']
|
|
229
|
+
if request_size_bytes != '':
|
|
230
|
+
hosts_requests_responses[type_to_check]['request_sizes'].append(int(request_size_bytes))
|
|
231
|
+
if response_size_bytes != '':
|
|
232
|
+
hosts_requests_responses[type_to_check]['response_sizes'].append(int(response_size_bytes))
|
|
233
|
+
except ValueError as e:
|
|
234
|
+
print_api(line, color='yellow')
|
|
235
|
+
raise e
|
|
236
|
+
|
|
237
|
+
return hosts_requests_responses
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def compute_statistics_from_data_dict(data_dict: dict):
|
|
241
|
+
"""
|
|
242
|
+
This function computes the statistics from the data dictionary.
|
|
243
|
+
|
|
244
|
+
:param data_dict: dict, the data dictionary.
|
|
245
|
+
:return: dict, the statistics dictionary.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
for host, host_dict in data_dict.items():
|
|
249
|
+
count_requests = len(host_dict['request_sizes'])
|
|
250
|
+
count_responses = len(host_dict['response_sizes'])
|
|
251
|
+
avg_request_size = statistics.mean(host_dict['request_sizes']) if count_requests > 0 else 0
|
|
252
|
+
median_request_size = statistics.median(host_dict['request_sizes']) if count_requests > 0 else 0
|
|
253
|
+
avg_response_size = statistics.mean(host_dict['response_sizes']) if count_responses > 0 else 0
|
|
254
|
+
median_response_size = statistics.median(host_dict['response_sizes']) if count_responses > 0 else 0
|
|
255
|
+
|
|
256
|
+
data_dict[host]['count_requests'] = count_requests
|
|
257
|
+
data_dict[host]['count_responses'] = count_responses
|
|
258
|
+
data_dict[host]['avg_request_size'] = avg_request_size
|
|
259
|
+
data_dict[host]['median_request_size'] = median_request_size
|
|
260
|
+
data_dict[host]['avg_response_size'] = avg_response_size
|
|
261
|
+
data_dict[host]['median_response_size'] = median_response_size
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def compute_statistics_from_content(
|
|
265
|
+
content: list,
|
|
266
|
+
by_type: Literal['host', 'url']
|
|
267
|
+
):
|
|
268
|
+
"""
|
|
269
|
+
This function computes the statistics from the 'statistics.csv' file content.
|
|
270
|
+
|
|
271
|
+
:param content: list, the content list.
|
|
272
|
+
:param by_type: string, the type to calculate the moving average by. Can be 'host' or 'url'.
|
|
273
|
+
:return: dict, the statistics dictionary.
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
requests_responses: dict = get_data_dict_from_statistics_content(content, by_type)
|
|
277
|
+
compute_statistics_from_data_dict(requests_responses)
|
|
278
|
+
|
|
279
|
+
return requests_responses
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def compute_moving_averages_from_average_statistics(
|
|
283
|
+
average_statistics_dict: dict,
|
|
284
|
+
moving_average_window_days: int
|
|
285
|
+
):
|
|
286
|
+
"""
|
|
287
|
+
This function computes the moving averages from the average statistics dictionary.
|
|
288
|
+
|
|
289
|
+
:param average_statistics_dict: dict, the average statistics dictionary.
|
|
290
|
+
:param moving_average_window_days: integer, the window size for the moving average.
|
|
291
|
+
:return: dict, the moving averages' dictionary.
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
moving_average: dict = {}
|
|
295
|
+
for day_index, (day, day_dict) in enumerate(average_statistics_dict.items()):
|
|
296
|
+
current_day = day_index + 1
|
|
297
|
+
if current_day < moving_average_window_days:
|
|
298
|
+
continue
|
|
299
|
+
|
|
300
|
+
# Create list of the last 'moving_average_window_days' days, including the current day.
|
|
301
|
+
last_x_window_days_content_list = (
|
|
302
|
+
list(average_statistics_dict.values()))[current_day - moving_average_window_days:current_day]
|
|
303
|
+
|
|
304
|
+
# Compute the moving averages.
|
|
305
|
+
moving_average[day] = compute_average_for_current_day_from_past_x_days(
|
|
306
|
+
last_x_window_days_content_list)
|
|
307
|
+
|
|
308
|
+
return moving_average
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def compute_average_for_current_day_from_past_x_days(
|
|
312
|
+
previous_days_content_list: list
|
|
313
|
+
) -> dict:
|
|
314
|
+
"""
|
|
315
|
+
This function computes the average for the current day from the past x days.
|
|
316
|
+
|
|
317
|
+
:param previous_days_content_list: list, the list of the previous days content.
|
|
318
|
+
:return: dict, the average dictionary.
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
moving_average: dict = {}
|
|
322
|
+
for entry in previous_days_content_list:
|
|
323
|
+
statistics_daily = entry['statistics_daily']
|
|
324
|
+
for host, host_dict in statistics_daily.items():
|
|
325
|
+
if host not in moving_average:
|
|
326
|
+
moving_average[host] = {
|
|
327
|
+
'all_request_counts': [],
|
|
328
|
+
'all_response_counts': [],
|
|
329
|
+
'avg_request_sizes': [],
|
|
330
|
+
'avg_response_sizes': [],
|
|
331
|
+
'median_request_sizes': [],
|
|
332
|
+
'median_response_sizes': []
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
moving_average[host]['all_request_counts'].append(int(host_dict['count_requests']))
|
|
336
|
+
moving_average[host]['all_response_counts'].append(int(host_dict['count_responses']))
|
|
337
|
+
moving_average[host]['avg_request_sizes'].append(float(host_dict['avg_request_size']))
|
|
338
|
+
moving_average[host]['avg_response_sizes'].append(float(host_dict['avg_response_size']))
|
|
339
|
+
moving_average[host]['median_request_sizes'].append(float(host_dict['median_request_size']))
|
|
340
|
+
moving_average[host]['median_response_sizes'].append(float(host_dict['median_response_size']))
|
|
341
|
+
|
|
342
|
+
# Compute the moving average.
|
|
343
|
+
moving_average_results: dict = {}
|
|
344
|
+
for host, host_dict in moving_average.items():
|
|
345
|
+
ma_request_count = statistics.mean(host_dict['all_request_counts'])
|
|
346
|
+
ma_response_count = statistics.mean(host_dict['all_response_counts'])
|
|
347
|
+
ma_request_size = statistics.mean(host_dict['avg_request_sizes'])
|
|
348
|
+
ma_response_size = statistics.mean(host_dict['avg_response_sizes'])
|
|
349
|
+
mm_request_count = statistics.median(host_dict['all_request_counts'])
|
|
350
|
+
mm_response_count = statistics.median(host_dict['all_response_counts'])
|
|
351
|
+
mm_request_size = statistics.median(host_dict['median_request_sizes'])
|
|
352
|
+
mm_response_size = statistics.median(host_dict['median_response_sizes'])
|
|
353
|
+
|
|
354
|
+
moving_average_results[host] = {
|
|
355
|
+
'ma_request_count': ma_request_count,
|
|
356
|
+
'ma_response_count': ma_response_count,
|
|
357
|
+
'ma_request_size': ma_request_size,
|
|
358
|
+
'ma_response_size': ma_response_size,
|
|
359
|
+
'mm_request_count': mm_request_count,
|
|
360
|
+
'mm_response_count': mm_response_count,
|
|
361
|
+
'mm_request_size': mm_request_size,
|
|
362
|
+
'mm_response_size': mm_response_size,
|
|
363
|
+
'all_request_counts': host_dict['all_request_counts'],
|
|
364
|
+
'all_response_counts': host_dict['all_response_counts'],
|
|
365
|
+
'avg_request_sizes': host_dict['avg_request_sizes'],
|
|
366
|
+
'avg_response_sizes': host_dict['avg_response_sizes'],
|
|
367
|
+
'median_request_sizes': host_dict['median_request_sizes'],
|
|
368
|
+
'median_response_sizes': host_dict['median_response_sizes']
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
return moving_average_results
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def find_deviation_from_moving_average(
|
|
375
|
+
statistics_content: dict,
|
|
376
|
+
top_bottom_deviation_percentage: float,
|
|
377
|
+
skip_total_count_less_than: int = None
|
|
378
|
+
) -> list:
|
|
379
|
+
"""
|
|
380
|
+
This function finds the deviation from the moving average to the bottom or top by specified percentage.
|
|
381
|
+
|
|
382
|
+
:param statistics_content: dict, the statistics content dictionary.
|
|
383
|
+
:param top_bottom_deviation_percentage: float, the percentage of deviation from the moving average to the top or
|
|
384
|
+
bottom.
|
|
385
|
+
:param skip_total_count_less_than: integer, if the total count is less than this number, skip the deviation.
|
|
386
|
+
:return: list, the deviation list.
|
|
387
|
+
"""
|
|
388
|
+
|
|
389
|
+
def _check_deviation(
|
|
390
|
+
check: Literal['count', 'avg'],
|
|
391
|
+
traffic_direction: Literal['request', 'response'],
|
|
392
|
+
day_statistics_content_dict: dict,
|
|
393
|
+
moving_averages_dict: dict
|
|
394
|
+
):
|
|
395
|
+
"""
|
|
396
|
+
This function checks the deviation for the host.
|
|
397
|
+
"""
|
|
398
|
+
|
|
399
|
+
nonlocal message
|
|
400
|
+
|
|
401
|
+
if check == 'count':
|
|
402
|
+
check_type = f'{check}_{traffic_direction}s'
|
|
403
|
+
ma_check_type = f'ma_{traffic_direction}_{check}'
|
|
404
|
+
median_type_string = check_type
|
|
405
|
+
moving_median_type_string = f'mm_{traffic_direction}_{check}'
|
|
406
|
+
elif check == 'avg':
|
|
407
|
+
check_type = f'{check}_{traffic_direction}_size'
|
|
408
|
+
ma_check_type = f'ma_{traffic_direction}_size'
|
|
409
|
+
median_type_string = f'median_{traffic_direction}_size'
|
|
410
|
+
moving_median_type_string = f'mm_{traffic_direction}_size'
|
|
411
|
+
else:
|
|
412
|
+
raise ValueError(f'Invalid check: {check}')
|
|
413
|
+
|
|
414
|
+
host_moving_average_by_type = moving_averages_dict[host][ma_check_type]
|
|
415
|
+
check_type_moving_by_percent = (
|
|
416
|
+
host_moving_average_by_type * top_bottom_deviation_percentage)
|
|
417
|
+
check_type_moving_above = host_moving_average_by_type + check_type_moving_by_percent
|
|
418
|
+
check_type_moving_below = host_moving_average_by_type - check_type_moving_by_percent
|
|
419
|
+
|
|
420
|
+
deviation_type = None
|
|
421
|
+
deviation_percentage = None
|
|
422
|
+
error_message: str = str()
|
|
423
|
+
if day_statistics_content_dict[check_type] > check_type_moving_above:
|
|
424
|
+
deviation_type = 'above'
|
|
425
|
+
try:
|
|
426
|
+
deviation_percentage = (
|
|
427
|
+
(day_statistics_content_dict[check_type] - host_moving_average_by_type) /
|
|
428
|
+
host_moving_average_by_type)
|
|
429
|
+
except ZeroDivisionError as e:
|
|
430
|
+
error_message = f' | Error: Division by 0, host_moving_average_by_type: {host_moving_average_by_type}'
|
|
431
|
+
elif day_statistics_content_dict[check_type] < check_type_moving_below:
|
|
432
|
+
deviation_type = 'below'
|
|
433
|
+
deviation_percentage = (
|
|
434
|
+
(host_moving_average_by_type - day_statistics_content_dict[check_type]) /
|
|
435
|
+
host_moving_average_by_type)
|
|
436
|
+
|
|
437
|
+
if deviation_type:
|
|
438
|
+
message = f'[{check_type}] is [{deviation_type}] the moving average.' + error_message
|
|
439
|
+
|
|
440
|
+
# The median and the total count are None for the count, Since they are the count.
|
|
441
|
+
if 'count' in check_type:
|
|
442
|
+
total_entries_averaged = None
|
|
443
|
+
median_size = None
|
|
444
|
+
else:
|
|
445
|
+
total_entries_averaged = day_statistics_content_dict[f'count_{traffic_direction}s']
|
|
446
|
+
median_size = day_statistics_content_dict[median_type_string]
|
|
447
|
+
|
|
448
|
+
value = day_statistics_content_dict[check_type]
|
|
449
|
+
|
|
450
|
+
# If the total count is less than the specified number, skip the deviation.
|
|
451
|
+
if skip_total_count_less_than:
|
|
452
|
+
if total_entries_averaged:
|
|
453
|
+
if total_entries_averaged < skip_total_count_less_than:
|
|
454
|
+
return
|
|
455
|
+
else:
|
|
456
|
+
if value < skip_total_count_less_than:
|
|
457
|
+
return
|
|
458
|
+
|
|
459
|
+
moving_median_size = moving_averages_dict[host][moving_median_type_string]
|
|
460
|
+
|
|
461
|
+
deviation_list.append({
|
|
462
|
+
'day': day,
|
|
463
|
+
'host': host,
|
|
464
|
+
'message': message,
|
|
465
|
+
'value': value,
|
|
466
|
+
'ma_value': host_moving_average_by_type,
|
|
467
|
+
'check_type': check_type,
|
|
468
|
+
'percentage': top_bottom_deviation_percentage,
|
|
469
|
+
'ma_value_checked': check_type_moving_above,
|
|
470
|
+
'deviation_percentage': deviation_percentage,
|
|
471
|
+
'total_entries_averaged': total_entries_averaged,
|
|
472
|
+
'deviation_type': deviation_type,
|
|
473
|
+
'median_size': median_size,
|
|
474
|
+
'mm_size': moving_median_size,
|
|
475
|
+
'data': day_statistics_content_dict,
|
|
476
|
+
'ma_data': moving_averages_dict[host]
|
|
477
|
+
})
|
|
478
|
+
|
|
479
|
+
deviation_list: list = []
|
|
480
|
+
for day_index, (day, day_dict) in enumerate(statistics_content.items()):
|
|
481
|
+
# If it's the first day, there is no previous day moving average.
|
|
482
|
+
if day_index == 0:
|
|
483
|
+
previous_day_moving_average_dict = {}
|
|
484
|
+
else:
|
|
485
|
+
previous_day_moving_average_dict = list(statistics_content.values())[day_index - 1].get('moving_average',
|
|
486
|
+
{})
|
|
487
|
+
|
|
488
|
+
# If there is no moving average for previous day continue to the next day.
|
|
489
|
+
if not previous_day_moving_average_dict:
|
|
490
|
+
continue
|
|
491
|
+
|
|
492
|
+
for host, host_dict in day_dict['statistics_daily'].items():
|
|
493
|
+
# If the host is not in the moving averages, then this is clear deviation.
|
|
494
|
+
# It means that in the current day, there were no requests for this host.
|
|
495
|
+
if host not in previous_day_moving_average_dict:
|
|
496
|
+
message = f'Host not in the moving averages: {host}'
|
|
497
|
+
deviation_list.append({
|
|
498
|
+
'day': day,
|
|
499
|
+
'host': host,
|
|
500
|
+
'message': message,
|
|
501
|
+
'value': None,
|
|
502
|
+
'ma_value': None,
|
|
503
|
+
'check_type': None,
|
|
504
|
+
'percentage': None,
|
|
505
|
+
'ma_value_checked': None,
|
|
506
|
+
'deviation_percentage': None,
|
|
507
|
+
'total_entries_averaged': None,
|
|
508
|
+
'deviation_type': 'clear',
|
|
509
|
+
'median_size': None,
|
|
510
|
+
'mm_size': None,
|
|
511
|
+
'data': host_dict,
|
|
512
|
+
'ma_data': previous_day_moving_average_dict
|
|
513
|
+
})
|
|
514
|
+
continue
|
|
515
|
+
|
|
516
|
+
_check_deviation(
|
|
517
|
+
'count', 'request', host_dict, previous_day_moving_average_dict)
|
|
518
|
+
_check_deviation(
|
|
519
|
+
'count', 'response', host_dict, previous_day_moving_average_dict)
|
|
520
|
+
_check_deviation(
|
|
521
|
+
'avg', 'request', host_dict, previous_day_moving_average_dict)
|
|
522
|
+
_check_deviation(
|
|
523
|
+
'avg', 'response', host_dict, previous_day_moving_average_dict)
|
|
524
|
+
|
|
525
|
+
return deviation_list
|