atomicshop 2.12.25__py3-none-any.whl → 2.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of atomicshop might be problematic. Click here for more details.

@@ -5,8 +5,10 @@ from typing import Literal, Union
5
5
 
6
6
  from .wrappers.pywin32w import wmi_win32process
7
7
  from .wrappers.psutilw import psutilw
8
+ from .etws.traces import trace_sysmon_process_creation
8
9
  from .basics import list_of_dicts, dicts
9
10
  from .process_name_cmd import ProcessNameCmdline
11
+ from .print_api import print_api
10
12
 
11
13
 
12
14
  def get_process_time_tester(
@@ -40,16 +42,24 @@ test = get_process_list.get_processes()
40
42
 
41
43
 
42
44
  class GetProcessList:
45
+ """
46
+ The class is responsible for getting the list of running processes.
47
+
48
+ Example of one time polling with 'pywin32' method:
49
+ from atomicshop import process_poller
50
+ process_list: dict = \
51
+ process_poller.GetProcessList(get_method='pywin32', connect_on_init=True).get_processes(as_dict=True)
52
+ """
43
53
  def __init__(
44
54
  self,
45
- get_method: Literal['psutil', 'pywin32', 'process_dll'] = 'process_dll',
55
+ get_method: Literal['psutil', 'pywin32', 'process_dll', 'sysmon_etw'] = 'process_dll',
46
56
  connect_on_init: bool = False
47
57
  ):
48
58
  """
49
59
  :param get_method: str, The method to get the list of processes. Default is 'process_list_dll'.
50
60
  'psutil': Get the list of processes by 'psutil' library. Resource intensive and slow.
51
61
  'pywin32': Get the list of processes by 'pywin32' library, using WMI. Not resource intensive, but slow.
52
- 'process_dll'. Not resource intensive and fast. Probably works only in Windows 10 x64
62
+ 'process_dll'. Not resource intensive and fast. Probably works only in Windows 10 x64.
53
63
  :param connect_on_init: bool, if True, will connect to the service on init. 'psutil' don't need to connect.
54
64
  """
55
65
  self.get_method = get_method
@@ -86,7 +96,7 @@ class GetProcessList:
86
96
  """
87
97
  The function will get the list of opened processes and return it as a list of dicts.
88
98
 
89
- :return: list of dicts, of opened processes.
99
+ :return: dict while key is pid or list of dicts, of opened processes (depending on 'as_dict' setting).
90
100
  """
91
101
 
92
102
  if as_dict:
@@ -132,19 +142,14 @@ class ProcessPollerPool:
132
142
  Later, I'll find a solution to make it more efficient.
133
143
  """
134
144
  def __init__(
135
- self, store_cycles: int = 200,
145
+ self,
136
146
  interval_seconds: Union[int, float] = 0,
137
147
  operation: Literal['thread', 'process'] = 'thread',
138
- poller_method: Literal['psutil', 'pywin32', 'process_dll'] = 'process_dll',
148
+ poller_method: Literal['psutil', 'pywin32', 'process_dll', 'sysmon_etw'] = 'sysmon_etw',
149
+ sysmon_etw_session_name: str = None,
150
+ sysmon_directory: str = None
139
151
  ):
140
152
  """
141
- :param store_cycles: int, how many cycles to store. Each cycle is polling processes.
142
- Example: Specifying 3 will store last 3 polled cycles of processes.
143
-
144
- Default is 200, which means that 200 latest cycles original PIDs and their process names will be stored.
145
-
146
- You can execute the 'get_process_time_tester' function in order to find the optimal number of cycles
147
- and how much time it will take.
148
153
  :param interval_seconds: float, how many seconds to wait between each cycle.
149
154
  Default is 0, which means that the polling will be as fast as possible.
150
155
 
@@ -162,6 +167,18 @@ class ProcessPollerPool:
162
167
  'psutil': Get the list of processes by 'psutil' library. Resource intensive and slow.
163
168
  'pywin32': Get the list of processes by 'pywin32' library, using WMI. Not resource intensive, but slow.
164
169
  'process_dll'. Not resource intensive and fast. Probably works only in Windows 10 x64.
170
+ 'sysmon_etw': Get the list of processes with running SysMon by ETW - Event Tracing for Windows.
171
+ In this case 'store_cycles' and 'interval_seconds' are irrelevant, since the ETW is real-time.
172
+ Steps we take:
173
+ 1. Check if SysMon is Running. If not, check if the executable exists in specified
174
+ location and start it as a service.
175
+ 2. Start the "Microsoft-Windows-Sysmon" ETW session.
176
+ 3. Take a snapshot of current processes and their CMDs with psutil and store it in a dict.
177
+ 4. Each new process creation from ETW updates the dict.
178
+ :param sysmon_etw_session_name: str, only for 'sysmon_etw' get_method.
179
+ The name of the ETW session for tracing process creation.
180
+ :param sysmon_directory: str, only for 'sysmon_etw' get_method.
181
+ The directory where the SysMon executable is located. If non-existed will be downloaded.
165
182
  ---------------------------------------------
166
183
  If there is an exception, ProcessPollerPool.processes will be set to the exception.
167
184
  While getting the processes you can use this to execute the exception:
@@ -172,20 +189,20 @@ class ProcessPollerPool:
172
189
  raise processes
173
190
  """
174
191
 
175
- self.store_cycles: int = store_cycles
176
192
  self.interval_seconds: float = interval_seconds
177
193
  self.operation: str = operation
178
194
  self.poller_method = poller_method
179
-
180
- self.get_processes_list = GetProcessList(get_method=self.poller_method)
195
+ self.sysmon_etw_session_name: str = sysmon_etw_session_name
196
+ self.sysmon_directory: str = sysmon_directory
181
197
 
182
198
  # Current process pool.
183
- self.processes: dict = dict()
199
+ self._processes: dict = dict()
184
200
 
185
201
  # The variable is responsible to stop the thread if it is running.
186
- self.running: bool = False
202
+ self._running: bool = False
187
203
 
188
- self.queue = multiprocessing.Queue()
204
+ self._process_queue = multiprocessing.Queue()
205
+ self._running_state_queue = multiprocessing.Queue()
189
206
 
190
207
  def start(self):
191
208
  if self.operation == 'thread':
@@ -195,66 +212,116 @@ class ProcessPollerPool:
195
212
  else:
196
213
  raise ValueError(f'Invalid operation type [{self.operation}]')
197
214
 
198
- def stop(self):
199
- self.running = False
200
-
201
- def _start_thread(self):
202
- self.running = True
203
- # threading.Thread(target=self._worker, args=(self.process_polling_instance,)).start()
204
- thread = threading.Thread(target=self._worker)
215
+ thread = threading.Thread(target=self._thread_get_queue)
205
216
  thread.daemon = True
206
217
  thread.start()
207
218
 
208
- def _start_process(self):
209
- self.running = True
210
- multiprocessing.Process(target=self._worker).start()
219
+ def stop(self):
220
+ self._running = False
221
+ self._running_state_queue.put(False)
211
222
 
212
- thread = threading.Thread(target=self._thread_get_queue)
223
+ def get_processes(self):
224
+ return self._processes
225
+
226
+ def _start_thread(self):
227
+ self._running = True
228
+
229
+ thread = threading.Thread(
230
+ target=_worker, args=(
231
+ self.poller_method, self._running_state_queue, self.interval_seconds,
232
+ self._process_queue, self.sysmon_etw_session_name, self.sysmon_directory,
233
+ )
234
+ )
213
235
  thread.daemon = True
214
236
  thread.start()
215
237
 
216
- def _worker(self):
217
- # We must initiate the connection inside the thread/process, because it is not thread-safe.
218
- self.get_processes_list.connect()
219
-
220
- exception = None
221
- list_of_processes: list = list()
222
- while self.running:
223
- try:
224
- # If the list is full (to specified 'store_cycles'), remove the first element.
225
- if len(list_of_processes) == self.store_cycles:
226
- del list_of_processes[0]
227
-
228
- # Get the current processes and reinitialize the instance of the dict.
229
- current_processes: dict = dict(self.get_processes_list.get_processes())
238
+ def _start_process(self):
239
+ self._running = True
240
+ multiprocessing.Process(
241
+ target=_worker, args=(
242
+ self.poller_method, self._running_state_queue, self.interval_seconds,
243
+ self._process_queue, self.sysmon_etw_session_name, self.sysmon_directory,
244
+ )).start()
230
245
 
231
- # Remove Command lines that contains only numbers, since they are useless.
232
- for pid, process_info in current_processes.items():
233
- if process_info['cmdline'].isnumeric():
234
- current_processes[pid]['cmdline'] = str()
235
- elif process_info['cmdline'] == 'Error':
236
- current_processes[pid]['cmdline'] = str()
246
+ def _thread_get_queue(self):
247
+ while True:
248
+ self._processes = self._process_queue.get()
237
249
 
238
- # Append the current processes to the list.
239
- list_of_processes.append(current_processes)
240
250
 
241
- # Merge all dicts in the list to one dict, updating with most recent PIDs.
242
- self.processes = list_of_dicts.merge_to_dict(list_of_processes)
251
+ def _worker(
252
+ poller_method, running_state_queue, interval_seconds, process_queue, sysmon_etw_session_name, sysmon_directory):
253
+ def _worker_to_get_running_state():
254
+ nonlocal running_state
255
+ running_state = running_state_queue.get()
243
256
 
244
- if self.operation == 'process':
245
- self.queue.put(self.processes)
257
+ running_state: bool = True
246
258
 
247
- time.sleep(self.interval_seconds)
248
- except KeyboardInterrupt as e:
249
- self.running = False
250
- exception = e
251
- except Exception as e:
252
- self.running = False
253
- exception = e
259
+ thread = threading.Thread(target=_worker_to_get_running_state)
260
+ thread.daemon = True
261
+ thread.start()
254
262
 
255
- if not self.running:
256
- self.queue.put(exception)
263
+ if poller_method == 'sysmon_etw':
264
+ poller_instance = trace_sysmon_process_creation.SysmonProcessCreationTrace(
265
+ attrs=['pid', 'original_file_name', 'command_line'],
266
+ session_name=sysmon_etw_session_name,
267
+ close_existing_session_name=True,
268
+ sysmon_directory=sysmon_directory
269
+ )
257
270
 
258
- def _thread_get_queue(self):
259
- while True:
260
- self.processes = self.queue.get()
271
+ # We must initiate the connection inside the thread/process, because it is not thread-safe.
272
+ poller_instance.start()
273
+
274
+ processes = GetProcessList(get_method='pywin32', connect_on_init=True).get_processes(as_dict=True)
275
+ process_queue.put(processes)
276
+ else:
277
+ poller_instance = GetProcessList(get_method=poller_method)
278
+ poller_instance.connect()
279
+ processes = {}
280
+
281
+ exception = None
282
+ list_of_processes: list = list()
283
+ while running_state:
284
+ try:
285
+ if poller_method == 'sysmon_etw':
286
+ # Get the current processes and reinitialize the instance of the dict.
287
+ current_cycle: dict = poller_instance.emit()
288
+ current_processes: dict = {int(current_cycle['pid']): {
289
+ 'name': current_cycle['original_file_name'],
290
+ 'cmdline': current_cycle['command_line']}
291
+ }
292
+ else:
293
+ # Get the current processes and reinitialize the instance of the dict.
294
+ current_processes: dict = dict(poller_instance.get_processes())
295
+
296
+ # Remove Command lines that contains only numbers, since they are useless.
297
+ for pid, process_info in current_processes.items():
298
+ if process_info['cmdline'].isnumeric():
299
+ current_processes[pid]['cmdline'] = str()
300
+ elif process_info['cmdline'] == 'Error':
301
+ current_processes[pid]['cmdline'] = str()
302
+
303
+ # This loop is essential for keeping the command lines.
304
+ # When the process unloads from memory, the last polling will have only pid and executable name, but not
305
+ # the command line. This loop will keep the command line from the previous polling if this happens.
306
+ for pid, process_info in current_processes.items():
307
+ if pid in processes:
308
+ if processes[pid]['name'] == current_processes[pid]['name']:
309
+ if current_processes[pid]['cmdline'] == '':
310
+ current_processes[pid]['cmdline'] = processes[pid]['cmdline']
311
+ processes.update(current_processes)
312
+
313
+ process_queue.put(processes)
314
+
315
+ # Since ETW is a blocking operation, we don't need to sleep.
316
+ if poller_method != 'sysmon_etw':
317
+ time.sleep(interval_seconds)
318
+ except KeyboardInterrupt as e:
319
+ running_state = False
320
+ exception = e
321
+ except Exception as e:
322
+ running_state = False
323
+ exception = e
324
+ print_api(f'Exception in ProcessPollerPool: {e}', color='red')
325
+
326
+ if not running_state:
327
+ process_queue.put(exception)
@@ -10,6 +10,16 @@ WNODE_FLAG_TRACED_GUID = 0x00020000
10
10
  MAXIMUM_LOGGERS = 64
11
11
 
12
12
 
13
+ """
14
+ wintypes.DWORD = wintypes.ULONG = ctypes.c_ulong: 32-bit unsigned integer
15
+ wintypes.WORD = wintypes.USHORT = ctypes.c_ushort: 16-bit unsigned integer
16
+ wintypes.BYTE = ctypes.c_ubyte: 8-bit unsigned integer
17
+ wintypes.LARGE_INTEGER is a structure (or union in C terms), can represent both signed and unsigned
18
+ 64-bit values depending on context.
19
+ ctypes.c_ulonglong is a simple data type representing an unsigned 64-bit integer.
20
+ """
21
+
22
+
13
23
  # Define GUID structure
14
24
  class GUID(ctypes.Structure):
15
25
  _fields_ = [
@@ -60,6 +70,65 @@ class EVENT_TRACE_PROPERTIES(ctypes.Structure):
60
70
  ]
61
71
 
62
72
 
73
+ # Define the EVENT_TRACE_LOGFILE structure
74
+ class EVENT_TRACE_LOGFILE(ctypes.Structure):
75
+ _fields_ = [
76
+ ("LogFileName", wintypes.LPWSTR),
77
+ ("LoggerName", wintypes.LPWSTR),
78
+ ("CurrentTime", wintypes.LARGE_INTEGER),
79
+ ("BuffersRead", wintypes.ULONG),
80
+ ("ProcessTraceMode", wintypes.ULONG),
81
+ ("EventRecordCallback", wintypes.LPVOID),
82
+ ("BufferSize", wintypes.ULONG),
83
+ ("Filled", wintypes.ULONG),
84
+ ("EventsLost", wintypes.ULONG),
85
+ ("BuffersLost", wintypes.ULONG),
86
+ ("RealTimeBuffersLost", wintypes.ULONG),
87
+ ("LogBuffersLost", wintypes.ULONG),
88
+ ("BuffersWritten", wintypes.ULONG),
89
+ ("LogFileMode", wintypes.ULONG),
90
+ ("IsKernelTrace", wintypes.ULONG),
91
+ ("Context", wintypes.ULONG) # Placeholder for context pointer
92
+ ]
93
+
94
+
95
+ # Define the EVENT_TRACE_HEADER structure
96
+ class EVENT_TRACE_HEADER(ctypes.Structure):
97
+ _fields_ = [
98
+ ("Size", wintypes.USHORT),
99
+ ("FieldTypeFlags", wintypes.USHORT),
100
+ ("Version", wintypes.USHORT),
101
+ ("Class", wintypes.USHORT), # EVENT_TRACE_CLASS
102
+ ("Type", ctypes.c_ubyte),
103
+ ("Level", ctypes.c_ubyte),
104
+ ("Channel", ctypes.c_ubyte),
105
+ ("Flags", ctypes.c_ubyte),
106
+ ("InstanceId", wintypes.USHORT),
107
+ ("ParentInstanceId", wintypes.USHORT),
108
+ ("ParentGuid", GUID),
109
+ ("Timestamp", wintypes.LARGE_INTEGER),
110
+ ("Guid", GUID),
111
+ ("ProcessorTime", wintypes.ULONG),
112
+ ("ThreadId", wintypes.ULONG),
113
+ ("ProcessId", wintypes.ULONG),
114
+ ("KernelTime", wintypes.ULONG),
115
+ ("UserTime", wintypes.ULONG),
116
+ ]
117
+
118
+
119
+ # Define the EVENT_RECORD structure
120
+ class EVENT_RECORD(ctypes.Structure):
121
+ _fields_ = [
122
+ ("EventHeader", EVENT_TRACE_HEADER),
123
+ ("BufferContext", wintypes.ULONG),
124
+ ("ExtendedDataCount", wintypes.USHORT),
125
+ ("UserDataLength", wintypes.USHORT),
126
+ ("ExtendedData", wintypes.LPVOID),
127
+ ("UserData", wintypes.LPVOID),
128
+ ("UserContext", wintypes.LPVOID)
129
+ ]
130
+
131
+
63
132
  class PROVIDER_ENUMERATION_INFO(ctypes.Structure):
64
133
  _fields_ = [
65
134
  ("NumberOfProviders", ULONG),
@@ -92,3 +161,15 @@ QueryAllTraces.argtypes = [
92
161
  ctypes.POINTER(wintypes.ULONG)
93
162
  ]
94
163
  QueryAllTraces.restype = wintypes.ULONG
164
+
165
+ OpenTrace = advapi32.OpenTraceW
166
+ OpenTrace.argtypes = [ctypes.POINTER(EVENT_TRACE_LOGFILE)]
167
+ OpenTrace.restype = wintypes.ULONG
168
+
169
+ ProcessTrace = advapi32.ProcessTrace
170
+ ProcessTrace.argtypes = [ctypes.POINTER(wintypes.ULONG), wintypes.ULONG, wintypes.LARGE_INTEGER, wintypes.LARGE_INTEGER]
171
+ ProcessTrace.restype = wintypes.ULONG
172
+
173
+ CloseTrace = advapi32.CloseTrace
174
+ CloseTrace.argtypes = [wintypes.ULONG]
175
+ CloseTrace.restype = wintypes.ULONG
@@ -6,10 +6,6 @@ from ... import filesystem, datetimes
6
6
  from ...file_io import csvs
7
7
 
8
8
 
9
- READING_EXISTING_LINES: list = []
10
- EXISTING_LOGS_FILE_COUNT: int = 0
11
-
12
-
13
9
  def get_logs_paths(
14
10
  log_files_directory_path: str = None,
15
11
  log_file_path: str = None,
@@ -212,31 +208,10 @@ def get_logs(
212
208
  return logs_content
213
209
 
214
210
 
215
- def get_latest_lines(
216
- log_file_path: str,
217
- date_pattern: str = None,
218
- log_type: Literal['csv'] = 'csv',
219
- get_previous_file: bool = False,
220
- header: list = None
221
- ) -> tuple:
211
+ class LogReader:
222
212
  """
223
- This function gets the latest lines from the log file.
213
+ This class gets the latest lines from the log file.
224
214
 
225
- :param log_file_path: Path to the log file.
226
- :param date_pattern: Pattern to match the date in the log file name.
227
- If specified, the function will get the log file by the date pattern.
228
- If not specified, the function will get the file date by file last modified time.
229
- :param log_type: Type of log to get.
230
- :param get_previous_file: Boolean, if True, the function will get the previous log file.
231
- For example, your log is set to rotate every Midnight.
232
- Meaning, once the day will change, the function will get the log file from the previous day in the third entry
233
- of the return tuple. This happens only once each 24 hours. Not from the time the function was called, but from
234
- the time the day changed.
235
- :param header: List of strings that will be the header of the CSV file. Default is 'None'.
236
- None: the header from the CSV file will be used. The first row of the CSV file will be the header.
237
- Meaning, that the first line will be skipped and the second line will be the first row of the content.
238
- List: the list will be used as header.
239
- All the lines of the CSV file will be considered as content.
240
215
  return: List of new lines.
241
216
 
242
217
  Usage:
@@ -246,14 +221,15 @@ def get_latest_lines(
246
221
  # The header of the log file will be read from the first iteration of the log file.
247
222
  # When the file is rotated, this header will be used to not read the header again.
248
223
  header: Union[list, None] = None
224
+ log_reader = reading.LogReader(
225
+ log_file_path='/path/to/log.csv',
226
+ log_type='csv',
227
+ date_pattern='%Y_%m_%d',
228
+ get_previous_file=True,
229
+ header=header
230
+ )
249
231
  while True:
250
- latest_lines, previous_day_24h_lines, header = reading.get_latest_lines(
251
- log_file_path='/path/to/log.csv',
252
- log_type='csv',
253
- date_pattern='%Y_%m_%d',
254
- get_previous_file=True,
255
- header=header
256
- )
232
+ latest_lines, previous_day_24h_lines, header = log_reader.get_latest_lines(header=header)
257
233
 
258
234
  if latest_lines:
259
235
  # Do something with the new lines.
@@ -262,91 +238,132 @@ def get_latest_lines(
262
238
  # Do something with the last 24 hours lines. Reminder, this will happen once a day on log rotation.
263
239
 
264
240
  time.sleep(1)
265
- """
266
-
267
- def extract_new_lines_only(content_lines: list):
241
+ """
242
+
243
+ def __init__(
244
+ self,
245
+ log_file_path: str,
246
+ date_pattern: str = None,
247
+ log_type: Literal['csv'] = 'csv',
248
+ get_previous_file: bool = False,
249
+ header: list = None
250
+ ):
251
+ """
252
+ :param log_file_path: Path to the log file.
253
+ :param date_pattern: Pattern to match the date in the log file name.
254
+ If specified, the function will get the log file by the date pattern.
255
+ If not specified, the function will get the file date by file last modified time.
256
+ :param log_type: Type of log to get.
257
+ :param get_previous_file: Boolean, if True, the function will get the previous log file.
258
+ For example, your log is set to rotate every Midnight.
259
+ Meaning, once the day will change, the function will get the log file from the previous day in the third entry
260
+ of the return tuple. This happens only once each 24 hours. Not from the time the function was called, but from
261
+ the time the day changed.
262
+ :param header: List of strings that will be the header of the CSV file. Default is 'None'.
263
+ None: the header from the CSV file will be used. The first row of the CSV file will be the header.
264
+ Meaning, that the first line will be skipped and the second line will be the first row of the content.
265
+ List: the list will be used as header.
266
+ All the lines of the CSV file will be considered as content.
267
+ """
268
+
269
+ self.log_file_path: str = log_file_path
270
+ self.date_pattern: str = date_pattern
271
+ self.log_type: Literal['csv'] = log_type
272
+ self.get_previous_file: bool = get_previous_file
273
+ self.header: list = header
274
+
275
+ self._reading_existing_lines: list = []
276
+ self._existing_logs_file_count: int = 0
277
+
278
+ def _extract_new_lines_only(self, content_lines: list):
268
279
  new_lines: list = []
269
280
  for row in content_lines:
270
281
  # If the row is not in the existing lines, then add it to the new lines.
271
- if row not in READING_EXISTING_LINES:
282
+ if row not in self._reading_existing_lines:
272
283
  new_lines.append(row)
273
284
 
274
285
  if new_lines:
275
- READING_EXISTING_LINES.extend(new_lines)
286
+ self._reading_existing_lines.extend(new_lines)
276
287
 
277
288
  return new_lines
278
289
 
279
- global EXISTING_LOGS_FILE_COUNT
290
+ def get_latest_lines(self, header: list = None) -> tuple:
291
+ if header:
292
+ self.header = header
280
293
 
281
- # If the existing logs file count is 0, it means that this is the first check. We need to get the current count.
282
- if EXISTING_LOGS_FILE_COUNT == 0:
283
- EXISTING_LOGS_FILE_COUNT = len(get_logs_paths(
284
- log_file_path=log_file_path,
285
- log_type='csv'
286
- ))
294
+ # If the existing logs file count is 0, it means that this is the first check. We need to get the current count.
295
+ if self._existing_logs_file_count == 0:
296
+ self._existing_logs_file_count = len(get_logs_paths(
297
+ log_file_path=self.log_file_path,
298
+ log_type='csv'
299
+ ))
287
300
 
288
- # If the count is still 0, then there are no logs to read.
289
- if EXISTING_LOGS_FILE_COUNT == 0:
290
- return [], [], header
301
+ # If the count is still 0, then there are no logs to read.
302
+ if self._existing_logs_file_count == 0:
303
+ return [], [], self.header
291
304
 
292
- if log_type != 'csv':
293
- raise ValueError('Only "csv" log type is supported.')
305
+ if self.log_type != 'csv':
306
+ raise ValueError('Only "csv" log type is supported.')
294
307
 
295
- previous_file_lines: list = []
308
+ previous_file_lines: list = []
296
309
 
297
- # Get the latest statistics file path.
298
- latest_statistics_file_path_object = get_logs_paths(
299
- log_file_path=log_file_path,
300
- date_pattern=date_pattern,
301
- log_type='csv',
302
- latest_only=True
303
- )
304
-
305
- latest_statistics_file_path: str = latest_statistics_file_path_object[0]['file_path']
306
-
307
- # Get the previous day statistics file path.
308
- previous_day_statistics_file_path: Union[str, None] = None
309
- try:
310
- previous_day_statistics_file_path = get_logs_paths(
311
- log_file_path=log_file_path,
312
- date_pattern=date_pattern,
310
+ # Get the latest statistics file path.
311
+ latest_statistics_file_path_object = get_logs_paths(
312
+ log_file_path=self.log_file_path,
313
+ date_pattern=self.date_pattern,
313
314
  log_type='csv',
314
- previous_day_only=True
315
- )[0]['file_path']
316
- # If you get IndexError, it means that there are no previous day logs to read.
317
- except IndexError:
318
- pass
319
-
320
- # Count all the rotated files.
321
- current_log_files_count: int = len(get_logs_paths(
322
- log_file_path=log_file_path,
323
- log_type='csv'
324
- ))
315
+ latest_only=True
316
+ )
317
+
318
+ # # If there are no logs to read, return empty lists.
319
+ # if not latest_statistics_file_path_object:
320
+ # return [], [], self.header
321
+
322
+ latest_statistics_file_path: str = latest_statistics_file_path_object[0]['file_path']
323
+
324
+ # Get the previous day statistics file path.
325
+ previous_day_statistics_file_path: Union[str, None] = None
326
+ try:
327
+ previous_day_statistics_file_path = get_logs_paths(
328
+ log_file_path=self.log_file_path,
329
+ date_pattern=self.date_pattern,
330
+ log_type='csv',
331
+ previous_day_only=True
332
+ )[0]['file_path']
333
+ # If you get IndexError, it means that there are no previous day logs to read.
334
+ except IndexError:
335
+ pass
336
+
337
+ # Count all the rotated files.
338
+ current_log_files_count: int = len(get_logs_paths(
339
+ log_file_path=self.log_file_path,
340
+ log_type='csv'
341
+ ))
325
342
 
326
- # If the count of the log files is greater than the existing logs file count, it means that the rotation happened.
327
- # We will read the previous day statistics file.
328
- new_lines_from_previous_file: list = []
329
- if current_log_files_count > EXISTING_LOGS_FILE_COUNT:
330
- current_lines, header = csvs.read_csv_to_list_of_dicts_by_header(
331
- previous_day_statistics_file_path, header=header, stdout=False)
343
+ # If the count of the log files is greater than the existing logs file count, it means that the rotation
344
+ # happened. We will read the previous day statistics file.
345
+ new_lines_from_previous_file: list = []
346
+ if current_log_files_count > self._existing_logs_file_count:
347
+ current_lines, self.header = csvs.read_csv_to_list_of_dicts_by_header(
348
+ previous_day_statistics_file_path, header=self.header, stdout=False)
332
349
 
333
- if get_previous_file:
334
- previous_file_lines = current_lines
350
+ if self.get_previous_file:
351
+ previous_file_lines = current_lines
335
352
 
336
- EXISTING_LOGS_FILE_COUNT = current_log_files_count
353
+ self._existing_logs_file_count = current_log_files_count
337
354
 
338
- new_lines_from_previous_file = extract_new_lines_only(current_lines)
355
+ new_lines_from_previous_file = self._extract_new_lines_only(current_lines)
339
356
 
340
- # empty the previous file lines, since the file is rotated.
341
- READING_EXISTING_LINES.clear()
357
+ # empty the previous file lines, since the file is rotated.
358
+ self._reading_existing_lines.clear()
342
359
 
343
- current_lines, header = csvs.read_csv_to_list_of_dicts_by_header(
344
- latest_statistics_file_path, header=header, stdout=False)
360
+ current_lines, self.header = csvs.read_csv_to_list_of_dicts_by_header(
361
+ latest_statistics_file_path, header=self.header, stdout=False)
345
362
 
346
- new_lines = extract_new_lines_only(current_lines)
363
+ new_lines = self._extract_new_lines_only(current_lines)
347
364
 
348
- # If we have new lines from the previous file, we will add the new lines from the latest file.
349
- if new_lines_from_previous_file:
350
- new_lines = new_lines_from_previous_file + new_lines
365
+ # If we have new lines from the previous file, we will add the new lines from the latest file.
366
+ if new_lines_from_previous_file:
367
+ new_lines = new_lines_from_previous_file + new_lines
351
368
 
352
- return new_lines, previous_file_lines, header
369
+ return new_lines, previous_file_lines, self.header
@@ -164,6 +164,15 @@ def filter_processes_with_present_connections(processes) -> list:
164
164
 
165
165
 
166
166
  class PsutilProcesses:
167
+ """
168
+ Class to get all the current processes.
169
+
170
+ Example get current running processes as dicts as
171
+ {'<pid'>: {'name': '<process_name>', 'cmdline': '<process_cmdline>'}}:
172
+ from atomicshop.wrappers.psutilw import psutilw
173
+ processes = psutilw.PsutilProcesses().get_processes_as_dict(
174
+ attrs=['pid', 'name', 'cmdline'], cmdline_to_string=True)
175
+ """
167
176
  def __init__(self):
168
177
  self.processes = None
169
178