atomicshop 2.12.25__py3-none-any.whl → 2.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of atomicshop might be problematic. Click here for more details.
- atomicshop/__init__.py +1 -1
- atomicshop/basics/dicts.py +12 -0
- atomicshop/basics/package_module.py +10 -0
- atomicshop/diff_check.py +5 -4
- atomicshop/dns.py +0 -9
- atomicshop/etws/const.py +38 -0
- atomicshop/etws/traces/__init__.py +0 -0
- atomicshop/{etw → etws/traces}/trace_dns.py +66 -23
- atomicshop/etws/traces/trace_sysmon_process_creation.py +116 -0
- atomicshop/monitor/change_monitor.py +5 -1
- atomicshop/monitor/checks/dns.py +11 -11
- atomicshop/process.py +2 -2
- atomicshop/process_poller.py +134 -67
- atomicshop/wrappers/ctyping/etw_winapi/const.py +81 -0
- atomicshop/wrappers/loggingw/reading.py +115 -98
- atomicshop/wrappers/psutilw/psutilw.py +9 -0
- atomicshop/wrappers/sysmonw.py +153 -0
- {atomicshop-2.12.25.dist-info → atomicshop-2.13.0.dist-info}/METADATA +1 -1
- {atomicshop-2.12.25.dist-info → atomicshop-2.13.0.dist-info}/RECORD +26 -21
- /atomicshop/{etw → etws}/__init__.py +0 -0
- /atomicshop/{etw → etws}/providers.py +0 -0
- /atomicshop/{etw → etws}/sessions.py +0 -0
- /atomicshop/{etw → etws}/trace.py +0 -0
- {atomicshop-2.12.25.dist-info → atomicshop-2.13.0.dist-info}/LICENSE.txt +0 -0
- {atomicshop-2.12.25.dist-info → atomicshop-2.13.0.dist-info}/WHEEL +0 -0
- {atomicshop-2.12.25.dist-info → atomicshop-2.13.0.dist-info}/top_level.txt +0 -0
atomicshop/process_poller.py
CHANGED
|
@@ -5,8 +5,10 @@ from typing import Literal, Union
|
|
|
5
5
|
|
|
6
6
|
from .wrappers.pywin32w import wmi_win32process
|
|
7
7
|
from .wrappers.psutilw import psutilw
|
|
8
|
+
from .etws.traces import trace_sysmon_process_creation
|
|
8
9
|
from .basics import list_of_dicts, dicts
|
|
9
10
|
from .process_name_cmd import ProcessNameCmdline
|
|
11
|
+
from .print_api import print_api
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
def get_process_time_tester(
|
|
@@ -40,16 +42,24 @@ test = get_process_list.get_processes()
|
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
class GetProcessList:
|
|
45
|
+
"""
|
|
46
|
+
The class is responsible for getting the list of running processes.
|
|
47
|
+
|
|
48
|
+
Example of one time polling with 'pywin32' method:
|
|
49
|
+
from atomicshop import process_poller
|
|
50
|
+
process_list: dict = \
|
|
51
|
+
process_poller.GetProcessList(get_method='pywin32', connect_on_init=True).get_processes(as_dict=True)
|
|
52
|
+
"""
|
|
43
53
|
def __init__(
|
|
44
54
|
self,
|
|
45
|
-
get_method: Literal['psutil', 'pywin32', 'process_dll'] = 'process_dll',
|
|
55
|
+
get_method: Literal['psutil', 'pywin32', 'process_dll', 'sysmon_etw'] = 'process_dll',
|
|
46
56
|
connect_on_init: bool = False
|
|
47
57
|
):
|
|
48
58
|
"""
|
|
49
59
|
:param get_method: str, The method to get the list of processes. Default is 'process_list_dll'.
|
|
50
60
|
'psutil': Get the list of processes by 'psutil' library. Resource intensive and slow.
|
|
51
61
|
'pywin32': Get the list of processes by 'pywin32' library, using WMI. Not resource intensive, but slow.
|
|
52
|
-
'process_dll'. Not resource intensive and fast. Probably works only in Windows 10 x64
|
|
62
|
+
'process_dll'. Not resource intensive and fast. Probably works only in Windows 10 x64.
|
|
53
63
|
:param connect_on_init: bool, if True, will connect to the service on init. 'psutil' don't need to connect.
|
|
54
64
|
"""
|
|
55
65
|
self.get_method = get_method
|
|
@@ -86,7 +96,7 @@ class GetProcessList:
|
|
|
86
96
|
"""
|
|
87
97
|
The function will get the list of opened processes and return it as a list of dicts.
|
|
88
98
|
|
|
89
|
-
:return: list of dicts, of opened processes.
|
|
99
|
+
:return: dict while key is pid or list of dicts, of opened processes (depending on 'as_dict' setting).
|
|
90
100
|
"""
|
|
91
101
|
|
|
92
102
|
if as_dict:
|
|
@@ -132,19 +142,14 @@ class ProcessPollerPool:
|
|
|
132
142
|
Later, I'll find a solution to make it more efficient.
|
|
133
143
|
"""
|
|
134
144
|
def __init__(
|
|
135
|
-
self,
|
|
145
|
+
self,
|
|
136
146
|
interval_seconds: Union[int, float] = 0,
|
|
137
147
|
operation: Literal['thread', 'process'] = 'thread',
|
|
138
|
-
poller_method: Literal['psutil', 'pywin32', 'process_dll'] = '
|
|
148
|
+
poller_method: Literal['psutil', 'pywin32', 'process_dll', 'sysmon_etw'] = 'sysmon_etw',
|
|
149
|
+
sysmon_etw_session_name: str = None,
|
|
150
|
+
sysmon_directory: str = None
|
|
139
151
|
):
|
|
140
152
|
"""
|
|
141
|
-
:param store_cycles: int, how many cycles to store. Each cycle is polling processes.
|
|
142
|
-
Example: Specifying 3 will store last 3 polled cycles of processes.
|
|
143
|
-
|
|
144
|
-
Default is 200, which means that 200 latest cycles original PIDs and their process names will be stored.
|
|
145
|
-
|
|
146
|
-
You can execute the 'get_process_time_tester' function in order to find the optimal number of cycles
|
|
147
|
-
and how much time it will take.
|
|
148
153
|
:param interval_seconds: float, how many seconds to wait between each cycle.
|
|
149
154
|
Default is 0, which means that the polling will be as fast as possible.
|
|
150
155
|
|
|
@@ -162,6 +167,18 @@ class ProcessPollerPool:
|
|
|
162
167
|
'psutil': Get the list of processes by 'psutil' library. Resource intensive and slow.
|
|
163
168
|
'pywin32': Get the list of processes by 'pywin32' library, using WMI. Not resource intensive, but slow.
|
|
164
169
|
'process_dll'. Not resource intensive and fast. Probably works only in Windows 10 x64.
|
|
170
|
+
'sysmon_etw': Get the list of processes with running SysMon by ETW - Event Tracing for Windows.
|
|
171
|
+
In this case 'store_cycles' and 'interval_seconds' are irrelevant, since the ETW is real-time.
|
|
172
|
+
Steps we take:
|
|
173
|
+
1. Check if SysMon is Running. If not, check if the executable exists in specified
|
|
174
|
+
location and start it as a service.
|
|
175
|
+
2. Start the "Microsoft-Windows-Sysmon" ETW session.
|
|
176
|
+
3. Take a snapshot of current processes and their CMDs with psutil and store it in a dict.
|
|
177
|
+
4. Each new process creation from ETW updates the dict.
|
|
178
|
+
:param sysmon_etw_session_name: str, only for 'sysmon_etw' get_method.
|
|
179
|
+
The name of the ETW session for tracing process creation.
|
|
180
|
+
:param sysmon_directory: str, only for 'sysmon_etw' get_method.
|
|
181
|
+
The directory where the SysMon executable is located. If non-existed will be downloaded.
|
|
165
182
|
---------------------------------------------
|
|
166
183
|
If there is an exception, ProcessPollerPool.processes will be set to the exception.
|
|
167
184
|
While getting the processes you can use this to execute the exception:
|
|
@@ -172,20 +189,20 @@ class ProcessPollerPool:
|
|
|
172
189
|
raise processes
|
|
173
190
|
"""
|
|
174
191
|
|
|
175
|
-
self.store_cycles: int = store_cycles
|
|
176
192
|
self.interval_seconds: float = interval_seconds
|
|
177
193
|
self.operation: str = operation
|
|
178
194
|
self.poller_method = poller_method
|
|
179
|
-
|
|
180
|
-
self.
|
|
195
|
+
self.sysmon_etw_session_name: str = sysmon_etw_session_name
|
|
196
|
+
self.sysmon_directory: str = sysmon_directory
|
|
181
197
|
|
|
182
198
|
# Current process pool.
|
|
183
|
-
self.
|
|
199
|
+
self._processes: dict = dict()
|
|
184
200
|
|
|
185
201
|
# The variable is responsible to stop the thread if it is running.
|
|
186
|
-
self.
|
|
202
|
+
self._running: bool = False
|
|
187
203
|
|
|
188
|
-
self.
|
|
204
|
+
self._process_queue = multiprocessing.Queue()
|
|
205
|
+
self._running_state_queue = multiprocessing.Queue()
|
|
189
206
|
|
|
190
207
|
def start(self):
|
|
191
208
|
if self.operation == 'thread':
|
|
@@ -195,66 +212,116 @@ class ProcessPollerPool:
|
|
|
195
212
|
else:
|
|
196
213
|
raise ValueError(f'Invalid operation type [{self.operation}]')
|
|
197
214
|
|
|
198
|
-
|
|
199
|
-
self.running = False
|
|
200
|
-
|
|
201
|
-
def _start_thread(self):
|
|
202
|
-
self.running = True
|
|
203
|
-
# threading.Thread(target=self._worker, args=(self.process_polling_instance,)).start()
|
|
204
|
-
thread = threading.Thread(target=self._worker)
|
|
215
|
+
thread = threading.Thread(target=self._thread_get_queue)
|
|
205
216
|
thread.daemon = True
|
|
206
217
|
thread.start()
|
|
207
218
|
|
|
208
|
-
def
|
|
209
|
-
self.
|
|
210
|
-
|
|
219
|
+
def stop(self):
|
|
220
|
+
self._running = False
|
|
221
|
+
self._running_state_queue.put(False)
|
|
211
222
|
|
|
212
|
-
|
|
223
|
+
def get_processes(self):
|
|
224
|
+
return self._processes
|
|
225
|
+
|
|
226
|
+
def _start_thread(self):
|
|
227
|
+
self._running = True
|
|
228
|
+
|
|
229
|
+
thread = threading.Thread(
|
|
230
|
+
target=_worker, args=(
|
|
231
|
+
self.poller_method, self._running_state_queue, self.interval_seconds,
|
|
232
|
+
self._process_queue, self.sysmon_etw_session_name, self.sysmon_directory,
|
|
233
|
+
)
|
|
234
|
+
)
|
|
213
235
|
thread.daemon = True
|
|
214
236
|
thread.start()
|
|
215
237
|
|
|
216
|
-
def
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
try:
|
|
224
|
-
# If the list is full (to specified 'store_cycles'), remove the first element.
|
|
225
|
-
if len(list_of_processes) == self.store_cycles:
|
|
226
|
-
del list_of_processes[0]
|
|
227
|
-
|
|
228
|
-
# Get the current processes and reinitialize the instance of the dict.
|
|
229
|
-
current_processes: dict = dict(self.get_processes_list.get_processes())
|
|
238
|
+
def _start_process(self):
|
|
239
|
+
self._running = True
|
|
240
|
+
multiprocessing.Process(
|
|
241
|
+
target=_worker, args=(
|
|
242
|
+
self.poller_method, self._running_state_queue, self.interval_seconds,
|
|
243
|
+
self._process_queue, self.sysmon_etw_session_name, self.sysmon_directory,
|
|
244
|
+
)).start()
|
|
230
245
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
current_processes[pid]['cmdline'] = str()
|
|
235
|
-
elif process_info['cmdline'] == 'Error':
|
|
236
|
-
current_processes[pid]['cmdline'] = str()
|
|
246
|
+
def _thread_get_queue(self):
|
|
247
|
+
while True:
|
|
248
|
+
self._processes = self._process_queue.get()
|
|
237
249
|
|
|
238
|
-
# Append the current processes to the list.
|
|
239
|
-
list_of_processes.append(current_processes)
|
|
240
250
|
|
|
241
|
-
|
|
242
|
-
|
|
251
|
+
def _worker(
|
|
252
|
+
poller_method, running_state_queue, interval_seconds, process_queue, sysmon_etw_session_name, sysmon_directory):
|
|
253
|
+
def _worker_to_get_running_state():
|
|
254
|
+
nonlocal running_state
|
|
255
|
+
running_state = running_state_queue.get()
|
|
243
256
|
|
|
244
|
-
|
|
245
|
-
self.queue.put(self.processes)
|
|
257
|
+
running_state: bool = True
|
|
246
258
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
exception = e
|
|
251
|
-
except Exception as e:
|
|
252
|
-
self.running = False
|
|
253
|
-
exception = e
|
|
259
|
+
thread = threading.Thread(target=_worker_to_get_running_state)
|
|
260
|
+
thread.daemon = True
|
|
261
|
+
thread.start()
|
|
254
262
|
|
|
255
|
-
|
|
256
|
-
|
|
263
|
+
if poller_method == 'sysmon_etw':
|
|
264
|
+
poller_instance = trace_sysmon_process_creation.SysmonProcessCreationTrace(
|
|
265
|
+
attrs=['pid', 'original_file_name', 'command_line'],
|
|
266
|
+
session_name=sysmon_etw_session_name,
|
|
267
|
+
close_existing_session_name=True,
|
|
268
|
+
sysmon_directory=sysmon_directory
|
|
269
|
+
)
|
|
257
270
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
271
|
+
# We must initiate the connection inside the thread/process, because it is not thread-safe.
|
|
272
|
+
poller_instance.start()
|
|
273
|
+
|
|
274
|
+
processes = GetProcessList(get_method='pywin32', connect_on_init=True).get_processes(as_dict=True)
|
|
275
|
+
process_queue.put(processes)
|
|
276
|
+
else:
|
|
277
|
+
poller_instance = GetProcessList(get_method=poller_method)
|
|
278
|
+
poller_instance.connect()
|
|
279
|
+
processes = {}
|
|
280
|
+
|
|
281
|
+
exception = None
|
|
282
|
+
list_of_processes: list = list()
|
|
283
|
+
while running_state:
|
|
284
|
+
try:
|
|
285
|
+
if poller_method == 'sysmon_etw':
|
|
286
|
+
# Get the current processes and reinitialize the instance of the dict.
|
|
287
|
+
current_cycle: dict = poller_instance.emit()
|
|
288
|
+
current_processes: dict = {int(current_cycle['pid']): {
|
|
289
|
+
'name': current_cycle['original_file_name'],
|
|
290
|
+
'cmdline': current_cycle['command_line']}
|
|
291
|
+
}
|
|
292
|
+
else:
|
|
293
|
+
# Get the current processes and reinitialize the instance of the dict.
|
|
294
|
+
current_processes: dict = dict(poller_instance.get_processes())
|
|
295
|
+
|
|
296
|
+
# Remove Command lines that contains only numbers, since they are useless.
|
|
297
|
+
for pid, process_info in current_processes.items():
|
|
298
|
+
if process_info['cmdline'].isnumeric():
|
|
299
|
+
current_processes[pid]['cmdline'] = str()
|
|
300
|
+
elif process_info['cmdline'] == 'Error':
|
|
301
|
+
current_processes[pid]['cmdline'] = str()
|
|
302
|
+
|
|
303
|
+
# This loop is essential for keeping the command lines.
|
|
304
|
+
# When the process unloads from memory, the last polling will have only pid and executable name, but not
|
|
305
|
+
# the command line. This loop will keep the command line from the previous polling if this happens.
|
|
306
|
+
for pid, process_info in current_processes.items():
|
|
307
|
+
if pid in processes:
|
|
308
|
+
if processes[pid]['name'] == current_processes[pid]['name']:
|
|
309
|
+
if current_processes[pid]['cmdline'] == '':
|
|
310
|
+
current_processes[pid]['cmdline'] = processes[pid]['cmdline']
|
|
311
|
+
processes.update(current_processes)
|
|
312
|
+
|
|
313
|
+
process_queue.put(processes)
|
|
314
|
+
|
|
315
|
+
# Since ETW is a blocking operation, we don't need to sleep.
|
|
316
|
+
if poller_method != 'sysmon_etw':
|
|
317
|
+
time.sleep(interval_seconds)
|
|
318
|
+
except KeyboardInterrupt as e:
|
|
319
|
+
running_state = False
|
|
320
|
+
exception = e
|
|
321
|
+
except Exception as e:
|
|
322
|
+
running_state = False
|
|
323
|
+
exception = e
|
|
324
|
+
print_api(f'Exception in ProcessPollerPool: {e}', color='red')
|
|
325
|
+
|
|
326
|
+
if not running_state:
|
|
327
|
+
process_queue.put(exception)
|
|
@@ -10,6 +10,16 @@ WNODE_FLAG_TRACED_GUID = 0x00020000
|
|
|
10
10
|
MAXIMUM_LOGGERS = 64
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
"""
|
|
14
|
+
wintypes.DWORD = wintypes.ULONG = ctypes.c_ulong: 32-bit unsigned integer
|
|
15
|
+
wintypes.WORD = wintypes.USHORT = ctypes.c_ushort: 16-bit unsigned integer
|
|
16
|
+
wintypes.BYTE = ctypes.c_ubyte: 8-bit unsigned integer
|
|
17
|
+
wintypes.LARGE_INTEGER is a structure (or union in C terms), can represent both signed and unsigned
|
|
18
|
+
64-bit values depending on context.
|
|
19
|
+
ctypes.c_ulonglong is a simple data type representing an unsigned 64-bit integer.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
|
|
13
23
|
# Define GUID structure
|
|
14
24
|
class GUID(ctypes.Structure):
|
|
15
25
|
_fields_ = [
|
|
@@ -60,6 +70,65 @@ class EVENT_TRACE_PROPERTIES(ctypes.Structure):
|
|
|
60
70
|
]
|
|
61
71
|
|
|
62
72
|
|
|
73
|
+
# Define the EVENT_TRACE_LOGFILE structure
|
|
74
|
+
class EVENT_TRACE_LOGFILE(ctypes.Structure):
|
|
75
|
+
_fields_ = [
|
|
76
|
+
("LogFileName", wintypes.LPWSTR),
|
|
77
|
+
("LoggerName", wintypes.LPWSTR),
|
|
78
|
+
("CurrentTime", wintypes.LARGE_INTEGER),
|
|
79
|
+
("BuffersRead", wintypes.ULONG),
|
|
80
|
+
("ProcessTraceMode", wintypes.ULONG),
|
|
81
|
+
("EventRecordCallback", wintypes.LPVOID),
|
|
82
|
+
("BufferSize", wintypes.ULONG),
|
|
83
|
+
("Filled", wintypes.ULONG),
|
|
84
|
+
("EventsLost", wintypes.ULONG),
|
|
85
|
+
("BuffersLost", wintypes.ULONG),
|
|
86
|
+
("RealTimeBuffersLost", wintypes.ULONG),
|
|
87
|
+
("LogBuffersLost", wintypes.ULONG),
|
|
88
|
+
("BuffersWritten", wintypes.ULONG),
|
|
89
|
+
("LogFileMode", wintypes.ULONG),
|
|
90
|
+
("IsKernelTrace", wintypes.ULONG),
|
|
91
|
+
("Context", wintypes.ULONG) # Placeholder for context pointer
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# Define the EVENT_TRACE_HEADER structure
|
|
96
|
+
class EVENT_TRACE_HEADER(ctypes.Structure):
|
|
97
|
+
_fields_ = [
|
|
98
|
+
("Size", wintypes.USHORT),
|
|
99
|
+
("FieldTypeFlags", wintypes.USHORT),
|
|
100
|
+
("Version", wintypes.USHORT),
|
|
101
|
+
("Class", wintypes.USHORT), # EVENT_TRACE_CLASS
|
|
102
|
+
("Type", ctypes.c_ubyte),
|
|
103
|
+
("Level", ctypes.c_ubyte),
|
|
104
|
+
("Channel", ctypes.c_ubyte),
|
|
105
|
+
("Flags", ctypes.c_ubyte),
|
|
106
|
+
("InstanceId", wintypes.USHORT),
|
|
107
|
+
("ParentInstanceId", wintypes.USHORT),
|
|
108
|
+
("ParentGuid", GUID),
|
|
109
|
+
("Timestamp", wintypes.LARGE_INTEGER),
|
|
110
|
+
("Guid", GUID),
|
|
111
|
+
("ProcessorTime", wintypes.ULONG),
|
|
112
|
+
("ThreadId", wintypes.ULONG),
|
|
113
|
+
("ProcessId", wintypes.ULONG),
|
|
114
|
+
("KernelTime", wintypes.ULONG),
|
|
115
|
+
("UserTime", wintypes.ULONG),
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# Define the EVENT_RECORD structure
|
|
120
|
+
class EVENT_RECORD(ctypes.Structure):
|
|
121
|
+
_fields_ = [
|
|
122
|
+
("EventHeader", EVENT_TRACE_HEADER),
|
|
123
|
+
("BufferContext", wintypes.ULONG),
|
|
124
|
+
("ExtendedDataCount", wintypes.USHORT),
|
|
125
|
+
("UserDataLength", wintypes.USHORT),
|
|
126
|
+
("ExtendedData", wintypes.LPVOID),
|
|
127
|
+
("UserData", wintypes.LPVOID),
|
|
128
|
+
("UserContext", wintypes.LPVOID)
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
|
|
63
132
|
class PROVIDER_ENUMERATION_INFO(ctypes.Structure):
|
|
64
133
|
_fields_ = [
|
|
65
134
|
("NumberOfProviders", ULONG),
|
|
@@ -92,3 +161,15 @@ QueryAllTraces.argtypes = [
|
|
|
92
161
|
ctypes.POINTER(wintypes.ULONG)
|
|
93
162
|
]
|
|
94
163
|
QueryAllTraces.restype = wintypes.ULONG
|
|
164
|
+
|
|
165
|
+
OpenTrace = advapi32.OpenTraceW
|
|
166
|
+
OpenTrace.argtypes = [ctypes.POINTER(EVENT_TRACE_LOGFILE)]
|
|
167
|
+
OpenTrace.restype = wintypes.ULONG
|
|
168
|
+
|
|
169
|
+
ProcessTrace = advapi32.ProcessTrace
|
|
170
|
+
ProcessTrace.argtypes = [ctypes.POINTER(wintypes.ULONG), wintypes.ULONG, wintypes.LARGE_INTEGER, wintypes.LARGE_INTEGER]
|
|
171
|
+
ProcessTrace.restype = wintypes.ULONG
|
|
172
|
+
|
|
173
|
+
CloseTrace = advapi32.CloseTrace
|
|
174
|
+
CloseTrace.argtypes = [wintypes.ULONG]
|
|
175
|
+
CloseTrace.restype = wintypes.ULONG
|
|
@@ -6,10 +6,6 @@ from ... import filesystem, datetimes
|
|
|
6
6
|
from ...file_io import csvs
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
READING_EXISTING_LINES: list = []
|
|
10
|
-
EXISTING_LOGS_FILE_COUNT: int = 0
|
|
11
|
-
|
|
12
|
-
|
|
13
9
|
def get_logs_paths(
|
|
14
10
|
log_files_directory_path: str = None,
|
|
15
11
|
log_file_path: str = None,
|
|
@@ -212,31 +208,10 @@ def get_logs(
|
|
|
212
208
|
return logs_content
|
|
213
209
|
|
|
214
210
|
|
|
215
|
-
|
|
216
|
-
log_file_path: str,
|
|
217
|
-
date_pattern: str = None,
|
|
218
|
-
log_type: Literal['csv'] = 'csv',
|
|
219
|
-
get_previous_file: bool = False,
|
|
220
|
-
header: list = None
|
|
221
|
-
) -> tuple:
|
|
211
|
+
class LogReader:
|
|
222
212
|
"""
|
|
223
|
-
This
|
|
213
|
+
This class gets the latest lines from the log file.
|
|
224
214
|
|
|
225
|
-
:param log_file_path: Path to the log file.
|
|
226
|
-
:param date_pattern: Pattern to match the date in the log file name.
|
|
227
|
-
If specified, the function will get the log file by the date pattern.
|
|
228
|
-
If not specified, the function will get the file date by file last modified time.
|
|
229
|
-
:param log_type: Type of log to get.
|
|
230
|
-
:param get_previous_file: Boolean, if True, the function will get the previous log file.
|
|
231
|
-
For example, your log is set to rotate every Midnight.
|
|
232
|
-
Meaning, once the day will change, the function will get the log file from the previous day in the third entry
|
|
233
|
-
of the return tuple. This happens only once each 24 hours. Not from the time the function was called, but from
|
|
234
|
-
the time the day changed.
|
|
235
|
-
:param header: List of strings that will be the header of the CSV file. Default is 'None'.
|
|
236
|
-
None: the header from the CSV file will be used. The first row of the CSV file will be the header.
|
|
237
|
-
Meaning, that the first line will be skipped and the second line will be the first row of the content.
|
|
238
|
-
List: the list will be used as header.
|
|
239
|
-
All the lines of the CSV file will be considered as content.
|
|
240
215
|
return: List of new lines.
|
|
241
216
|
|
|
242
217
|
Usage:
|
|
@@ -246,14 +221,15 @@ def get_latest_lines(
|
|
|
246
221
|
# The header of the log file will be read from the first iteration of the log file.
|
|
247
222
|
# When the file is rotated, this header will be used to not read the header again.
|
|
248
223
|
header: Union[list, None] = None
|
|
224
|
+
log_reader = reading.LogReader(
|
|
225
|
+
log_file_path='/path/to/log.csv',
|
|
226
|
+
log_type='csv',
|
|
227
|
+
date_pattern='%Y_%m_%d',
|
|
228
|
+
get_previous_file=True,
|
|
229
|
+
header=header
|
|
230
|
+
)
|
|
249
231
|
while True:
|
|
250
|
-
latest_lines, previous_day_24h_lines, header =
|
|
251
|
-
log_file_path='/path/to/log.csv',
|
|
252
|
-
log_type='csv',
|
|
253
|
-
date_pattern='%Y_%m_%d',
|
|
254
|
-
get_previous_file=True,
|
|
255
|
-
header=header
|
|
256
|
-
)
|
|
232
|
+
latest_lines, previous_day_24h_lines, header = log_reader.get_latest_lines(header=header)
|
|
257
233
|
|
|
258
234
|
if latest_lines:
|
|
259
235
|
# Do something with the new lines.
|
|
@@ -262,91 +238,132 @@ def get_latest_lines(
|
|
|
262
238
|
# Do something with the last 24 hours lines. Reminder, this will happen once a day on log rotation.
|
|
263
239
|
|
|
264
240
|
time.sleep(1)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
def
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
def __init__(
|
|
244
|
+
self,
|
|
245
|
+
log_file_path: str,
|
|
246
|
+
date_pattern: str = None,
|
|
247
|
+
log_type: Literal['csv'] = 'csv',
|
|
248
|
+
get_previous_file: bool = False,
|
|
249
|
+
header: list = None
|
|
250
|
+
):
|
|
251
|
+
"""
|
|
252
|
+
:param log_file_path: Path to the log file.
|
|
253
|
+
:param date_pattern: Pattern to match the date in the log file name.
|
|
254
|
+
If specified, the function will get the log file by the date pattern.
|
|
255
|
+
If not specified, the function will get the file date by file last modified time.
|
|
256
|
+
:param log_type: Type of log to get.
|
|
257
|
+
:param get_previous_file: Boolean, if True, the function will get the previous log file.
|
|
258
|
+
For example, your log is set to rotate every Midnight.
|
|
259
|
+
Meaning, once the day will change, the function will get the log file from the previous day in the third entry
|
|
260
|
+
of the return tuple. This happens only once each 24 hours. Not from the time the function was called, but from
|
|
261
|
+
the time the day changed.
|
|
262
|
+
:param header: List of strings that will be the header of the CSV file. Default is 'None'.
|
|
263
|
+
None: the header from the CSV file will be used. The first row of the CSV file will be the header.
|
|
264
|
+
Meaning, that the first line will be skipped and the second line will be the first row of the content.
|
|
265
|
+
List: the list will be used as header.
|
|
266
|
+
All the lines of the CSV file will be considered as content.
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
self.log_file_path: str = log_file_path
|
|
270
|
+
self.date_pattern: str = date_pattern
|
|
271
|
+
self.log_type: Literal['csv'] = log_type
|
|
272
|
+
self.get_previous_file: bool = get_previous_file
|
|
273
|
+
self.header: list = header
|
|
274
|
+
|
|
275
|
+
self._reading_existing_lines: list = []
|
|
276
|
+
self._existing_logs_file_count: int = 0
|
|
277
|
+
|
|
278
|
+
def _extract_new_lines_only(self, content_lines: list):
|
|
268
279
|
new_lines: list = []
|
|
269
280
|
for row in content_lines:
|
|
270
281
|
# If the row is not in the existing lines, then add it to the new lines.
|
|
271
|
-
if row not in
|
|
282
|
+
if row not in self._reading_existing_lines:
|
|
272
283
|
new_lines.append(row)
|
|
273
284
|
|
|
274
285
|
if new_lines:
|
|
275
|
-
|
|
286
|
+
self._reading_existing_lines.extend(new_lines)
|
|
276
287
|
|
|
277
288
|
return new_lines
|
|
278
289
|
|
|
279
|
-
|
|
290
|
+
def get_latest_lines(self, header: list = None) -> tuple:
|
|
291
|
+
if header:
|
|
292
|
+
self.header = header
|
|
280
293
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
294
|
+
# If the existing logs file count is 0, it means that this is the first check. We need to get the current count.
|
|
295
|
+
if self._existing_logs_file_count == 0:
|
|
296
|
+
self._existing_logs_file_count = len(get_logs_paths(
|
|
297
|
+
log_file_path=self.log_file_path,
|
|
298
|
+
log_type='csv'
|
|
299
|
+
))
|
|
287
300
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
301
|
+
# If the count is still 0, then there are no logs to read.
|
|
302
|
+
if self._existing_logs_file_count == 0:
|
|
303
|
+
return [], [], self.header
|
|
291
304
|
|
|
292
|
-
|
|
293
|
-
|
|
305
|
+
if self.log_type != 'csv':
|
|
306
|
+
raise ValueError('Only "csv" log type is supported.')
|
|
294
307
|
|
|
295
|
-
|
|
308
|
+
previous_file_lines: list = []
|
|
296
309
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
log_type='csv',
|
|
302
|
-
latest_only=True
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
latest_statistics_file_path: str = latest_statistics_file_path_object[0]['file_path']
|
|
306
|
-
|
|
307
|
-
# Get the previous day statistics file path.
|
|
308
|
-
previous_day_statistics_file_path: Union[str, None] = None
|
|
309
|
-
try:
|
|
310
|
-
previous_day_statistics_file_path = get_logs_paths(
|
|
311
|
-
log_file_path=log_file_path,
|
|
312
|
-
date_pattern=date_pattern,
|
|
310
|
+
# Get the latest statistics file path.
|
|
311
|
+
latest_statistics_file_path_object = get_logs_paths(
|
|
312
|
+
log_file_path=self.log_file_path,
|
|
313
|
+
date_pattern=self.date_pattern,
|
|
313
314
|
log_type='csv',
|
|
314
|
-
|
|
315
|
-
)
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
315
|
+
latest_only=True
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# # If there are no logs to read, return empty lists.
|
|
319
|
+
# if not latest_statistics_file_path_object:
|
|
320
|
+
# return [], [], self.header
|
|
321
|
+
|
|
322
|
+
latest_statistics_file_path: str = latest_statistics_file_path_object[0]['file_path']
|
|
323
|
+
|
|
324
|
+
# Get the previous day statistics file path.
|
|
325
|
+
previous_day_statistics_file_path: Union[str, None] = None
|
|
326
|
+
try:
|
|
327
|
+
previous_day_statistics_file_path = get_logs_paths(
|
|
328
|
+
log_file_path=self.log_file_path,
|
|
329
|
+
date_pattern=self.date_pattern,
|
|
330
|
+
log_type='csv',
|
|
331
|
+
previous_day_only=True
|
|
332
|
+
)[0]['file_path']
|
|
333
|
+
# If you get IndexError, it means that there are no previous day logs to read.
|
|
334
|
+
except IndexError:
|
|
335
|
+
pass
|
|
336
|
+
|
|
337
|
+
# Count all the rotated files.
|
|
338
|
+
current_log_files_count: int = len(get_logs_paths(
|
|
339
|
+
log_file_path=self.log_file_path,
|
|
340
|
+
log_type='csv'
|
|
341
|
+
))
|
|
325
342
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
343
|
+
# If the count of the log files is greater than the existing logs file count, it means that the rotation
|
|
344
|
+
# happened. We will read the previous day statistics file.
|
|
345
|
+
new_lines_from_previous_file: list = []
|
|
346
|
+
if current_log_files_count > self._existing_logs_file_count:
|
|
347
|
+
current_lines, self.header = csvs.read_csv_to_list_of_dicts_by_header(
|
|
348
|
+
previous_day_statistics_file_path, header=self.header, stdout=False)
|
|
332
349
|
|
|
333
|
-
|
|
334
|
-
|
|
350
|
+
if self.get_previous_file:
|
|
351
|
+
previous_file_lines = current_lines
|
|
335
352
|
|
|
336
|
-
|
|
353
|
+
self._existing_logs_file_count = current_log_files_count
|
|
337
354
|
|
|
338
|
-
|
|
355
|
+
new_lines_from_previous_file = self._extract_new_lines_only(current_lines)
|
|
339
356
|
|
|
340
|
-
|
|
341
|
-
|
|
357
|
+
# empty the previous file lines, since the file is rotated.
|
|
358
|
+
self._reading_existing_lines.clear()
|
|
342
359
|
|
|
343
|
-
|
|
344
|
-
|
|
360
|
+
current_lines, self.header = csvs.read_csv_to_list_of_dicts_by_header(
|
|
361
|
+
latest_statistics_file_path, header=self.header, stdout=False)
|
|
345
362
|
|
|
346
|
-
|
|
363
|
+
new_lines = self._extract_new_lines_only(current_lines)
|
|
347
364
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
365
|
+
# If we have new lines from the previous file, we will add the new lines from the latest file.
|
|
366
|
+
if new_lines_from_previous_file:
|
|
367
|
+
new_lines = new_lines_from_previous_file + new_lines
|
|
351
368
|
|
|
352
|
-
|
|
369
|
+
return new_lines, previous_file_lines, self.header
|
|
@@ -164,6 +164,15 @@ def filter_processes_with_present_connections(processes) -> list:
|
|
|
164
164
|
|
|
165
165
|
|
|
166
166
|
class PsutilProcesses:
|
|
167
|
+
"""
|
|
168
|
+
Class to get all the current processes.
|
|
169
|
+
|
|
170
|
+
Example get current running processes as dicts as
|
|
171
|
+
{'<pid'>: {'name': '<process_name>', 'cmdline': '<process_cmdline>'}}:
|
|
172
|
+
from atomicshop.wrappers.psutilw import psutilw
|
|
173
|
+
processes = psutilw.PsutilProcesses().get_processes_as_dict(
|
|
174
|
+
attrs=['pid', 'name', 'cmdline'], cmdline_to_string=True)
|
|
175
|
+
"""
|
|
167
176
|
def __init__(self):
|
|
168
177
|
self.processes = None
|
|
169
178
|
|