TSVZ 2.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
TSVZ.py ADDED
@@ -0,0 +1,669 @@
1
+ #!/usr/bin/env python3
2
+ import os
3
+ from collections import OrderedDict , deque
4
+ import time
5
+ import atexit
6
+ import threading
7
+
8
+ if os.name == 'nt':
9
+ import msvcrt
10
+ elif os.name == 'posix':
11
+ import fcntl
12
+
13
+ version = '2.57'
14
+
15
+ def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True):
16
+ """
17
+ Process a line of text and update the task dictionary.
18
+
19
+ Parameters:
20
+ line (str): The line of text to process.
21
+ taskDic (dict): The dictionary to update with the processed line.
22
+ correctColumnNum (int): The expected number of columns in the line.
23
+ verbose (bool, optional): Whether to print verbose output. Defaults to False.
24
+ teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
25
+ strict (bool, optional): Whether to strictly enforce the correct number of columns. Defaults to True.
26
+
27
+ Returns:
28
+ tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
29
+
30
+ """
31
+ line = line.decode().strip(' ').strip('\x00')
32
+ # we throw away the lines that start with '#'
33
+ if not line :
34
+ if verbose:
35
+ __teePrintOrNot(f"Ignoring empty line: {line}",teeLogger=teeLogger)
36
+ return correctColumnNum , []
37
+ if line.startswith('#'):
38
+ if verbose:
39
+ __teePrintOrNot(f"Ignoring comment line: {line}",teeLogger=teeLogger)
40
+ return correctColumnNum , []
41
+ # we only interested in the lines that have the correct number of columns
42
+ lineCache = [segment.strip() for segment in line.split('\t')]
43
+ if not lineCache:
44
+ return correctColumnNum , []
45
+ if correctColumnNum == -1:
46
+ if verbose:
47
+ __teePrintOrNot(f"detected correctColumnNum: {len(lineCache)}",teeLogger=teeLogger)
48
+ correctColumnNum = len(lineCache)
49
+ if not lineCache[0]:
50
+ if verbose:
51
+ __teePrintOrNot(f"Ignoring line with empty key: {line}",teeLogger=teeLogger)
52
+ return correctColumnNum , []
53
+ if len(lineCache) == 1 or not any(lineCache[1:]):
54
+ if correctColumnNum == 1: taskDic[lineCache[0]] = lineCache
55
+ else:
56
+ if verbose:
57
+ __teePrintOrNot(f"Key {lineCache[0]} found with empty value, deleting such key's representaion",teeLogger=teeLogger)
58
+ if lineCache[0] in taskDic:
59
+ del taskDic[lineCache[0]]
60
+ return correctColumnNum , []
61
+ elif len(lineCache) == correctColumnNum:
62
+ taskDic[lineCache[0]] = lineCache
63
+ if verbose:
64
+ __teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
65
+ else:
66
+ if strict:
67
+ if verbose:
68
+ __teePrintOrNot(f"Ignoring line with {len(lineCache)} columns: {line}",teeLogger=teeLogger)
69
+ return correctColumnNum , []
70
+ else:
71
+ # fill / cut the line with empty entries til the correct number of columns
72
+ if len(lineCache) < correctColumnNum:
73
+ lineCache += ['']*(correctColumnNum-len(lineCache))
74
+ elif len(lineCache) > correctColumnNum:
75
+ lineCache = lineCache[:correctColumnNum]
76
+ taskDic[lineCache[0]] = lineCache
77
+ if verbose:
78
+ __teePrintOrNot(f"Key {lineCache[0]} added after correction",teeLogger=teeLogger)
79
+ return correctColumnNum, lineCache
80
+
81
+ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False):
82
+ """
83
+ Reads the last valid line from a file.
84
+
85
+ Args:
86
+ fileName (str): The name of the file to read.
87
+ taskDic (dict): A dictionary to pass to processLine function.
88
+ correctColumnNum (int): A column number to pass to processLine function.
89
+ verbose (bool, optional): Whether to print verbose output. Defaults to False.
90
+ teeLogger (optional): Logger to use for tee print. Defaults to None.
91
+ strict (bool, optional): Whether to enforce strict processing. Defaults to False.
92
+
93
+ Returns:
94
+ list: The last valid line data processed by processLine, or an empty list if none found.
95
+ """
96
+ chunk_size = 1024 # Read in chunks of 1024 bytes
97
+ last_valid_line = []
98
+ if verbose:
99
+ __teePrintOrNot(f"Reading last line only from {fileName}",teeLogger=teeLogger)
100
+ with open(fileName, 'rb') as file:
101
+ file.seek(0, os.SEEK_END)
102
+ file_size = file.tell()
103
+ buffer = b''
104
+ position = file_size
105
+
106
+ while position > 0:
107
+ # Read chunks from the end of the file
108
+ read_size = min(chunk_size, position)
109
+ position -= read_size
110
+ file.seek(position)
111
+ chunk = file.read(read_size)
112
+
113
+ # Prepend new chunk to buffer
114
+ buffer = chunk + buffer
115
+
116
+ # Split the buffer into lines
117
+ lines = buffer.split(b'\n')
118
+
119
+ # Process lines from the last to the first
120
+ for i in range(len(lines) - 1, -1, -1):
121
+ if lines[i].strip(): # Skip empty lines
122
+ # Process the line
123
+ correctColumnNum, lineCache = processLine(
124
+ lines[i],
125
+ taskDic,
126
+ correctColumnNum,
127
+ verbose=verbose,
128
+ teeLogger=teeLogger,
129
+ strict=strict
130
+ )
131
+ # If the line is valid, return it
132
+ if lineCache and any(lineCache):
133
+ return lineCache
134
+
135
+ # Keep the last (possibly incomplete) line in buffer for the next read
136
+ buffer = lines[0]
137
+
138
+ # Return empty list if no valid line found
139
+ return last_valid_line
140
+
141
+ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True):
142
+ """
143
+ Read a TSV (Tab-Separated Values) file and return the data as a dictionary.
144
+
145
+ Parameters:
146
+ - fileName (str): The path to the TSV file.
147
+ - teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
148
+ - header (str or list, optional): The header of the TSV file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
149
+ - createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
150
+ - lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
151
+ - verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
152
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
153
+ - taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to None.
154
+ - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
155
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
156
+
157
+ Returns:
158
+ - OrderedDict: The dictionary containing the data from the TSV file.
159
+
160
+ Raises:
161
+ - Exception: If the file is not found or there is a data format error.
162
+
163
+ """
164
+ if taskDic is None:
165
+ taskDic = OrderedDict()
166
+
167
+ header = header.strip() if type(header) == str else '\t'.join(header)
168
+ if not header.endswith('\n'):
169
+ header += '\n'
170
+ if not os.path.isfile(fileName):
171
+ if createIfNotExist:
172
+ with open(fileName, mode ='w',encoding=encoding)as file:
173
+ file.write(header)
174
+ __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
175
+ verifyHeader = True
176
+ else:
177
+ __teePrintOrNot('File not found','error',teeLogger=teeLogger)
178
+ raise Exception("File not found")
179
+ with open(fileName, mode ='rb')as file:
180
+ if header.strip():
181
+ if verifyHeader:
182
+ line = file.readline().decode().strip()
183
+ if verbose:
184
+ __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
185
+ __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
186
+ #assert line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')), "Data format error!"
187
+ if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
188
+ __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
189
+ raise Exception("Data format error! Header mismatch")
190
+ correctColumnNum = len(header.strip().split('\t'))
191
+ if verbose:
192
+ __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
193
+ else:
194
+ correctColumnNum = -1
195
+ if lastLineOnly:
196
+ lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict)
197
+ if lineCache:
198
+ taskDic[lineCache[0]] = lineCache
199
+ return lineCache
200
+ for line in file:
201
+ correctColumnNum, lineCache = processLine(line,taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict)
202
+ return taskDic
203
+
204
+ def __teePrintOrNot(message,level = 'info',teeLogger = None):
205
+ """
206
+ Prints the given message or logs it using the provided teeLogger.
207
+
208
+ Parameters:
209
+ message (str): The message to be printed or logged.
210
+ level (str, optional): The log level. Defaults to 'info'.
211
+ teeLogger (object, optional): The logger object used for logging. Defaults to None.
212
+
213
+ Returns:
214
+ None
215
+ """
216
+ try:
217
+ if teeLogger:
218
+ teeLogger.teelog(message,level)
219
+ else:
220
+ print(message)
221
+ except Exception as e:
222
+ print(message)
223
+
224
+ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8'):
225
+ """
226
+ Append a line of data to a TSV file.
227
+ Parameters:
228
+ - fileName (str): The path of the TSV file.
229
+ - lineToAppend (str or list): The line of data to append. If it is a string, it will be split by tabs ('\t') to form a list.
230
+ - teeLogger (optional): A logger object for logging messages.
231
+ - header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
232
+ - createIfNotExist (bool, optional): If True, the file will be created if it does not exist. If False and the file does not exist, an exception will be raised.
233
+ - verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
234
+ - verbose (bool, optional): If True, additional information will be printed during the execution.
235
+ - encoding (str, optional): The encoding of the file.
236
+ Raises:
237
+ - Exception: If the file does not exist and createIfNotExist is False.
238
+ - Exception: If the existing header does not match the provided header.
239
+ """
240
+ if not header.endswith('\n'):
241
+ header += '\n'
242
+ if not os.path.isfile(fileName):
243
+ if createIfNotExist:
244
+ with open(fileName, mode ='w',encoding=encoding)as file:
245
+ file.write(header)
246
+ __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
247
+ verifyHeader = True
248
+ else:
249
+ __teePrintOrNot('File not found','error',teeLogger=teeLogger)
250
+ raise Exception("File not found")
251
+
252
+ if type(lineToAppend) == str:
253
+ lineToAppend = lineToAppend.strip().split('\t')
254
+
255
+ with open(fileName, mode ='r+b')as file:
256
+ if header.strip():
257
+ if verifyHeader:
258
+ line = file.readline().decode().strip()
259
+ if verbose:
260
+ __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
261
+ __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
262
+ #assert line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')), "Data format error!"
263
+ if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
264
+ __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
265
+ raise Exception("Data format error! Header mismatch")
266
+ correctColumnNum = len(header.strip().split('\t'))
267
+ if verbose:
268
+ __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
269
+ else:
270
+ correctColumnNum = len(lineToAppend)
271
+ # truncate / fill the lineToAppend to the correct number of columns
272
+ if len(lineToAppend) < correctColumnNum:
273
+ lineToAppend += ['']*(correctColumnNum-len(lineToAppend))
274
+ elif len(lineToAppend) > correctColumnNum:
275
+ lineToAppend = lineToAppend[:correctColumnNum]
276
+ # check if the file ends in a newline
277
+ file.seek(-1, os.SEEK_END)
278
+ if file.read(1) != b'\n':
279
+ file.write(b'\n')
280
+ file.write('\t'.join(lineToAppend).encode() + b'\n')
281
+ if verbose:
282
+ __teePrintOrNot(f"Appended {lineToAppend} to {fileName}",teeLogger=teeLogger)
283
+
284
+ # create a tsv class that functions like a ordered dictionary but will update the file when modified
285
+ class TSVZed(OrderedDict):
286
+ def __teePrintOrNot(self,message,level = 'info'):
287
+ try:
288
+ if self.teeLogger:
289
+ self.teeLogger.teelog(message,level)
290
+ else:
291
+ print(message)
292
+ except Exception as e:
293
+ print(message)
294
+
295
+ def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = None):
296
+ super().__init__()
297
+ self.version = version
298
+ self._fileName = fileName
299
+ self.teeLogger = teeLogger
300
+ self.header = header.strip() if type(header) == str else '\t'.join(header)
301
+ self.correctColumnNum = -1
302
+ self.createIfNotExist = createIfNotExist
303
+ self.verifyHeader = verifyHeader
304
+ self.rewrite_on_load = rewrite_on_load
305
+ self.rewrite_on_exit = rewrite_on_exit
306
+ self.rewrite_interval = rewrite_interval
307
+ self.monitor_external_changes = monitor_external_changes
308
+ self.verbose = verbose
309
+ if append_check_delay < 0:
310
+ append_check_delay = 0.00001
311
+ self.__teePrintOrNot('append_check_delay cannot be less than 0, setting it to 0.00001','error')
312
+ self.append_check_delay = append_check_delay
313
+ self.appendQueue = deque()
314
+ self.dirty = False
315
+ self.deSynced = False
316
+ self.memoryOnly = False
317
+ self.encoding = encoding
318
+ self.writeLock = threading.Lock()
319
+ self.shutdownEvent = threading.Event()
320
+ #self.appendEvent = threading.Event()
321
+ self.appendThread = threading.Thread(target=self._appendWorker,daemon=True)
322
+ self.appendThread.start()
323
+ self.load()
324
+ atexit.register(self.stopAppendThread)
325
+
326
+ def load(self):
327
+ self.reload()
328
+ if self.rewrite_on_load:
329
+ self.rewrite(force = True,reloadInternalFromFile = False)
330
+ return self
331
+
332
+ def reload(self):
333
+ # Load or refresh data from the TSV file
334
+ mo = self.memoryOnly
335
+ self.memoryOnly = True
336
+ if self.verbose:
337
+ self.__teePrintOrNot(f"Loading {self._fileName}")
338
+ super().clear()
339
+ readTSV(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None)
340
+ if self.verbose:
341
+ self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
342
+ self.correctColumnNum = len(self.header.split('\t')) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
343
+ if self.verbose:
344
+ self.__teePrintOrNot(f"correctColumnNum: {self.correctColumnNum}")
345
+ #super().update(loadedData)
346
+ if self.verbose:
347
+ self.__teePrintOrNot(f"TSVZed({self._fileName}) loaded")
348
+ self.memoryOnly = mo
349
+ return self
350
+
351
+ def __setitem__(self,key,value):
352
+ key = str(key).strip()
353
+ if not key:
354
+ self.__teePrintOrNot('Key cannot be empty','error')
355
+ return
356
+ if type(value) == str:
357
+ value = value.strip().split('\t')
358
+ # sanitize the value
359
+ value = [(str(segment).strip() if type(segment) != str else segment.strip()) if segment else '' for segment in value]
360
+ #value = list(map(lambda segment: str(segment).strip(), value))
361
+ # the first field in value should be the key
362
+ # add it if it is not there
363
+ if not value or value[0] != key:
364
+ value = [key]+value
365
+ # verify the value has the correct number of columns
366
+ if self.correctColumnNum != 1 and len(value) == 1:
367
+ # this means we want to clear / deelte the key
368
+ self.__delitem__(key)
369
+ elif self.correctColumnNum > 0:
370
+ assert len(value) == self.correctColumnNum, f"Data format error! Expected {self.correctColumnNum} columns, but got {len(value) } columns"
371
+ else:
372
+ self.correctColumnNum = len(value)
373
+ if self.verbose:
374
+ self.__teePrintOrNot(f"Setting {key} to {value}")
375
+
376
+ if key in self:
377
+ if self[key] == value:
378
+ if self.verbose:
379
+ self.__teePrintOrNot(f"Key {key} already exists with the same value")
380
+ return
381
+ self.dirty = True
382
+ # update the dictionary,
383
+ super().__setitem__(key,value)
384
+ if self.verbose:
385
+ self.__teePrintOrNot(f"Key {key} updated")
386
+ if self.memoryOnly:
387
+ return
388
+ if self.verbose:
389
+ self.__teePrintOrNot(f"Appending {key} to the appendQueue")
390
+ self.appendQueue.append('\t'.join(value))
391
+ # if not self.appendThread.is_alive():
392
+ # self.commitAppendToFile()
393
+ # else:
394
+ # self.appendEvent.set()
395
+
396
+
397
+ def __delitem__(self,key):
398
+ key = str(key).strip()
399
+ # delete the key from the dictionary and update the file
400
+ if key not in self:
401
+ if self.verbose:
402
+ self.__teePrintOrNot(f"Key {key} not found")
403
+ return
404
+ super().__delitem__(key)
405
+ if self.memoryOnly:
406
+ return
407
+ self.__appendEmptyLine(key)
408
+
409
+ def __appendEmptyLine(self,key):
410
+ self.dirty = True
411
+ if self.correctColumnNum > 0:
412
+ emptyLine = key+'\t'*(self.correctColumnNum-1)
413
+ elif len(self[key]) > 1:
414
+ self.correctColumnNum = len(self[key])
415
+ emptyLine = key+'\t'*(self.correctColumnNum-1)
416
+ else:
417
+ emptyLine = key
418
+ if self.verbose:
419
+ self.__teePrintOrNot(f"Appending {emptyLine} to the appendQueue")
420
+ self.appendQueue.append(emptyLine)
421
+ return self
422
+
423
+ def clear(self):
424
+ # clear the dictionary and update the file
425
+ super().clear()
426
+ if self.verbose:
427
+ self.__teePrintOrNot(f"Clearing {self._fileName}")
428
+ if self.memoryOnly:
429
+ return self
430
+ self.clear_file()
431
+ return self
432
+
433
+ def clear_file(self):
434
+ try:
435
+ if self.header:
436
+ file = self.get_file_obj('w')
437
+ file.write(self.header+'\n')
438
+ self.release_file_obj(file)
439
+ if self.verbose:
440
+ self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
441
+ self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
442
+ else:
443
+ file = self.get_file_obj('w')
444
+ self.release_file_obj(file)
445
+ if self.verbose:
446
+ self.__teePrintOrNot(f"File {self._fileName} cleared empty")
447
+ self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
448
+ self.dirty = False
449
+ self.deSynced = False
450
+ except Exception as e:
451
+ self.__teePrintOrNot(f"Failed to write at clear_file() to {self._fileName}: {e}",'error')
452
+ import traceback
453
+ self.__teePrintOrNot(traceback.format_exc(),'error')
454
+ self.deSynced = True
455
+ return self
456
+
457
+ def __enter__(self):
458
+ return self
459
+
460
+ def __exit__(self,exc_type,exc_value,traceback):
461
+ self.stopAppendThread()
462
+ return self
463
+
464
+ def __repr__(self):
465
+ return f"""TSVZed(
466
+ file_name:{self._fileName}
467
+ teeLogger:{self.teeLogger}
468
+ header:{self.header}
469
+ correctColumnNum:{self.correctColumnNum}
470
+ createIfNotExist:{self.createIfNotExist}
471
+ verifyHeader:{self.verifyHeader}
472
+ rewrite_on_load:{self.rewrite_on_load}
473
+ rewrite_on_exit:{self.rewrite_on_exit}
474
+ rewrite_interval:{self.rewrite_interval}
475
+ append_check_delay:{self.append_check_delay}
476
+ appendQueueLength:{len(self.appendQueue)}
477
+ appendThreadAlive:{self.appendThread.is_alive()}
478
+ dirty:{self.dirty}
479
+ deSynced:{self.deSynced}
480
+ memoryOnly:{self.memoryOnly}
481
+ {dict(self)})"""
482
+
483
+ def close(self):
484
+ self.stopAppendThread()
485
+ return self
486
+
487
+ def __str__(self):
488
+ return f"TSVZed({self._fileName},{dict(self)})"
489
+
490
+ def __del__(self):
491
+ self.stopAppendThread()
492
+ return self
493
+
494
+ def popitem(self, last=True):
495
+ key, value = super().popitem(last)
496
+ if not self.memoryOnly:
497
+ self.__appendEmptyLine(key)
498
+ return key, value
499
+
500
+ __marker = object()
501
+
502
+ def pop(self, key, default=__marker):
503
+ '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
504
+ value. If key is not found, d is returned if given, otherwise KeyError
505
+ is raised.
506
+
507
+ '''
508
+ if key not in self:
509
+ if default is self.__marker:
510
+ raise KeyError(key)
511
+ return default
512
+ value = super().pop(key)
513
+ if not self.memoryOnly:
514
+ self.__appendEmptyLine(key)
515
+ return value
516
+
517
+ def move_to_end(self, key, last=True):
518
+ '''Move an existing element to the end (or beginning if last is false).
519
+ Raise KeyError if the element does not exist.
520
+ '''
521
+ super().move_to_end(key, last)
522
+ self.dirty = True
523
+ if not self.rewrite_on_exit:
524
+ self.rewrite_on_exit = True
525
+ self.__teePrintOrNot(f"Warning: move_to_end had been called. Need to resync for changes to apply to disk.")
526
+ self.__teePrintOrNot(f"rewrite_on_exit set to True")
527
+ if self.verbose:
528
+ self.__teePrintOrNot(f"Warning: Trying to move Key {key} moved to {'end' if last else 'beginning'} Need to resync for changes to apply to disk")
529
+ return self
530
+
531
+ @classmethod
532
+ def fromkeys(cls, iterable, value=None,fileName = None,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,verbose = False):
533
+ '''Create a new ordered dictionary with keys from iterable and values set to value.
534
+ '''
535
+ self = cls(fileName,teeLogger,header,createIfNotExist,verifyHeader,rewrite_on_load,rewrite_on_exit,rewrite_interval,append_check_delay,verbose)
536
+ for key in iterable:
537
+ self[key] = value
538
+ return self
539
+
540
+
541
+ def rewrite(self,force = False,reloadInternalFromFile = None):
542
+ if not self.dirty and not force:
543
+ return False
544
+ if not self.deSynced and not force:
545
+ if self.rewrite_interval == 0 or time.time() - os.path.getmtime(self._fileName) < self.rewrite_interval:
546
+ return False
547
+ try:
548
+ if self.verbose:
549
+ self.__teePrintOrNot(f"Rewriting {self._fileName}")
550
+ if reloadInternalFromFile is None:
551
+ reloadInternalFromFile = self.monitor_external_changes
552
+ if reloadInternalFromFile:
553
+ # this will be needed if more than 1 process is accessing the file
554
+ self.commitAppendToFile()
555
+ self.reload()
556
+ self.mapToFile()
557
+ if self.verbose:
558
+ self.__teePrintOrNot(f"{len(self)} records rewrote to {self._fileName}")
559
+ if not self.appendThread.is_alive():
560
+ self.commitAppendToFile()
561
+ # else:
562
+ # self.appendEvent.set()
563
+ return True
564
+ except Exception as e:
565
+ self.__teePrintOrNot(f"Failed to write at sync() to {self._fileName}: {e}",'error')
566
+ import traceback
567
+ self.__teePrintOrNot(traceback.format_exc(),'error')
568
+ self.deSynced = True
569
+ return False
570
+
571
+ def mapToFile(self):
572
+ try:
573
+ file = self.get_file_obj('w')
574
+ if self.header:
575
+ file.write(self.header+'\n')
576
+ for key in self:
577
+ file.write('\t'.join(self[key])+'\n')
578
+ self.release_file_obj(file)
579
+ if self.verbose:
580
+ self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
581
+ self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
582
+ self.dirty = False
583
+ self.deSynced = False
584
+ except Exception as e:
585
+ self.__teePrintOrNot(f"Failed to write at dumpToFile() to {self._fileName}: {e}",'error')
586
+ import traceback
587
+ self.__teePrintOrNot(traceback.format_exc(),'error')
588
+ self.deSynced = True
589
+ return self
590
+
591
+ def _appendWorker(self):
592
+ while not self.shutdownEvent.is_set():
593
+ self.rewrite()
594
+ self.commitAppendToFile()
595
+ time.sleep(self.append_check_delay)
596
+ # self.appendEvent.wait()
597
+ # self.appendEvent.clear()
598
+ if self.verbose:
599
+ self.__teePrintOrNot(f"Append worker for {self._fileName} shut down")
600
+ self.commitAppendToFile()
601
+
602
+ def commitAppendToFile(self):
603
+ if self.appendQueue:
604
+ try:
605
+ if self.verbose:
606
+ self.__teePrintOrNot(f"Commiting {len(self.appendQueue)} records to {self._fileName}")
607
+ self.__teePrintOrNot(f"Before size of {self._fileName}: {os.path.getsize(self._fileName)}")
608
+ file = self.get_file_obj('a')
609
+ while self.appendQueue:
610
+ line = self.appendQueue.popleft()
611
+ file.write(line+'\n')
612
+ self.release_file_obj(file)
613
+ if self.verbose:
614
+ self.__teePrintOrNot(f"Records commited to {self._fileName}")
615
+ self.__teePrintOrNot(f"After size of {self._fileName}: {os.path.getsize(self._fileName)}")
616
+ except Exception as e:
617
+ self.__teePrintOrNot(f"Failed to write at commitAppendToFile to {self._fileName}: {e}",'error')
618
+ import traceback
619
+ self.__teePrintOrNot(traceback.format_exc(),'error')
620
+ self.deSynced = True
621
+ return self
622
+
623
+ def stopAppendThread(self):
624
+ if self.shutdownEvent.is_set():
625
+ # if self.verbose:
626
+ # self.__teePrintOrNot(f"Append thread for {self._fileName} already stopped")
627
+ return
628
+ self.rewrite(force=self.rewrite_on_exit) # Ensure any final sync operations are performed
629
+ # self.appendEvent.set()
630
+ self.shutdownEvent.set() # Signal the append thread to shut down
631
+ self.appendThread.join() # Wait for the append thread to complete
632
+ if self.verbose:
633
+ self.__teePrintOrNot(f"Append thread for {self._fileName} stopped")
634
+
635
+ def get_file_obj(self,modes = 'a'):
636
+ self.writeLock.acquire()
637
+ try:
638
+ if not self.encoding:
639
+ self.encoding = 'utf8'
640
+ file = open(self._fileName, mode=modes, encoding=self.encoding)
641
+ # Lock the file after opening
642
+ if os.name == 'posix':
643
+ fcntl.lockf(file, fcntl.LOCK_EX)
644
+ elif os.name == 'nt':
645
+ # For Windows, locking the entire file, avoiding locking an empty file
646
+ lock_length = max(1, os.path.getsize(self._fileName))
647
+ msvcrt.locking(file.fileno(), msvcrt.LK_LOCK, lock_length)
648
+ if self.verbose:
649
+ self.__teePrintOrNot(f"File {self._fileName} locked with mode {modes}")
650
+ except Exception as e:
651
+ self.writeLock.release() # Release the thread lock in case of an error
652
+ raise e # Re-raise the exception to handle it outside or notify the user
653
+ return file
654
+
655
+ def release_file_obj(self,file):
656
+ try:
657
+ if os.name == 'posix':
658
+ fcntl.lockf(file, fcntl.LOCK_UN)
659
+ elif os.name == 'nt':
660
+ # Unlocking the entire file; for Windows, ensure not unlocking an empty file
661
+ unlock_length = max(1, os.path.getsize(file.name))
662
+ msvcrt.locking(file.fileno(), msvcrt.LK_UNLCK, unlock_length)
663
+ file.close() # Ensure file is closed after unlocking
664
+ if self.verbose:
665
+ self.__teePrintOrNot(f"File {file.name} unlocked / released")
666
+ except Exception as e:
667
+ raise e # Re-raise the exception for external handling
668
+ finally:
669
+ self.writeLock.release() # Ensure the thread lock is always released