TSVZ 2.62__tar.gz → 2.66__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: TSVZ
3
- Version: 2.62
3
+ Version: 2.66
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: TSVZ
3
- Version: 2.62
3
+ Version: 2.66
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- import os
2
+ import os , sys
3
3
  from collections import OrderedDict , deque
4
4
  import time
5
5
  import atexit
@@ -10,7 +10,8 @@ if os.name == 'nt':
10
10
  elif os.name == 'posix':
11
11
  import fcntl
12
12
 
13
- version = '2.62'
13
+ version = '2.66'
14
+ author = 'pan@zopyr.us'
14
15
 
15
16
 
16
17
  def pretty_format_table(data):
@@ -37,6 +38,26 @@ def pretty_format_table(data):
37
38
  outTable.append(row_format.format(*row))
38
39
  return '\n'.join(outTable) + '\n'
39
40
 
41
+ def __teePrintOrNot(message,level = 'info',teeLogger = None):
42
+ """
43
+ Prints the given message or logs it using the provided teeLogger.
44
+
45
+ Parameters:
46
+ message (str): The message to be printed or logged.
47
+ level (str, optional): The log level. Defaults to 'info'.
48
+ teeLogger (object, optional): The logger object used for logging. Defaults to None.
49
+
50
+ Returns:
51
+ None
52
+ """
53
+ try:
54
+ if teeLogger:
55
+ teeLogger.teelog(message,level)
56
+ else:
57
+ print(message,flush=True)
58
+ except Exception as e:
59
+ print(message,flush=True)
60
+
40
61
  def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True):
41
62
  """
42
63
  Process a line of text and update the task dictionary.
@@ -183,32 +204,66 @@ def formatHeader(header,verbose = False,teeLogger = None):
183
204
  __teePrintOrNot('Invalid header, setting header to empty.','error',teeLogger=teeLogger)
184
205
  header = ''
185
206
  header = header.strip()
186
- if header:
187
- if not header.endswith('\n'):
188
- header += '\n'
189
- else:
190
- header = ''
207
+ # if header:
208
+ # if not header.endswith('\n'):
209
+ # header += '\n'
210
+ # else:
211
+ # header = ''
191
212
  return header
192
213
 
193
- def __teePrintOrNot(message,level = 'info',teeLogger = None):
214
+ def lineContainHeader(header,line,verbose = False,teeLogger = None,strict = False):
194
215
  """
195
- Prints the given message or logs it using the provided teeLogger.
216
+ Verify if a line contains the header.
196
217
 
197
218
  Parameters:
198
- message (str): The message to be printed or logged.
199
- level (str, optional): The log level. Defaults to 'info'.
200
- teeLogger (object, optional): The logger object used for logging. Defaults to None.
219
+ - header (str): The header string to verify.
220
+ - line (str): The line to verify against the header.
221
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
222
+ - teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
223
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
201
224
 
202
225
  Returns:
203
- None
226
+ bool: True if the header matches the line, False otherwise.
204
227
  """
205
- try:
206
- if teeLogger:
207
- teeLogger.teelog(message,level)
228
+ if verbose:
229
+ __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
230
+ __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
231
+ if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
232
+ __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
233
+ if strict:
234
+ raise Exception("Data format error! Header mismatch")
235
+ return False
236
+ return True
237
+
238
+ def verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = None,header = '',encoding = 'utf8',strict = True):
239
+ """
240
+ Verify the existence of a TSV file.
241
+
242
+ Parameters:
243
+ - fileName (str): The path of the TSV file.
244
+ - createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to True.
245
+ - teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
246
+ - header (str, optional): The header line to verify against. Defaults to ''.
247
+ - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
248
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
249
+
250
+ Returns:
251
+ bool: True if the file exists, False otherwise.
252
+ """
253
+ if not fileName.endswith('.tsv'):
254
+ __teePrintOrNot(f'Warning: Filename {fileName} does not end with .tsv','warning',teeLogger=teeLogger)
255
+ if not os.path.isfile(fileName):
256
+ if createIfNotExist:
257
+ with open(fileName, mode ='w',encoding=encoding)as file:
258
+ file.write(header+'\n')
259
+ __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
260
+ return True
261
+ elif strict:
262
+ __teePrintOrNot('File not found','error',teeLogger=teeLogger)
263
+ raise Exception("File not found")
208
264
  else:
209
- print(message)
210
- except Exception as e:
211
- print(message)
265
+ return False
266
+ return True
212
267
 
213
268
  def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True):
214
269
  """
@@ -236,32 +291,17 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
236
291
  if taskDic is None:
237
292
  taskDic = {}
238
293
  header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
239
- if not os.path.isfile(fileName):
240
- if createIfNotExist:
241
- with open(fileName, mode ='w',encoding=encoding)as file:
242
- file.write(header)
243
- __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
244
- verifyHeader = True
245
- else:
246
- __teePrintOrNot('File not found','error',teeLogger=teeLogger)
247
- raise Exception("File not found")
294
+ if not verifyTSVExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict):
295
+ return taskDic
248
296
  with open(fileName, mode ='rb')as file:
297
+ correctColumnNum = -1
249
298
  if header.strip():
250
299
  if verifyHeader:
251
300
  line = file.readline().decode().strip()
252
- if verbose:
253
- __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
254
- __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
255
- #assert line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')), "Data format error!"
256
- if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
257
- __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
258
- if strict:
259
- raise Exception("Data format error! Header mismatch")
260
- correctColumnNum = len(header.strip().split('\t'))
261
- if verbose:
262
- __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
263
- else:
264
- correctColumnNum = -1
301
+ if lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
302
+ correctColumnNum = len(header.strip().split('\t'))
303
+ if verbose:
304
+ __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
265
305
  if lastLineOnly:
266
306
  lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict)
267
307
  if lineCache:
@@ -289,36 +329,20 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
289
329
  - Exception: If the existing header does not match the provided header.
290
330
  """
291
331
  header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
292
- if not os.path.isfile(fileName):
293
- if createIfNotExist:
294
- with open(fileName, mode ='w',encoding=encoding)as file:
295
- file.write(header)
296
- __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
297
- verifyHeader = True
298
- else:
299
- __teePrintOrNot('File not found','error',teeLogger=teeLogger)
300
- raise Exception("File not found")
301
-
332
+ if not verifyTSVExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict):
333
+ return
302
334
  if type(lineToAppend) == str:
303
335
  lineToAppend = lineToAppend.strip().split('\t')
304
336
 
305
337
  with open(fileName, mode ='r+b')as file:
338
+ correctColumnNum = len(lineToAppend)
306
339
  if header.strip():
307
340
  if verifyHeader:
308
341
  line = file.readline().decode().strip()
309
- if verbose:
310
- __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
311
- __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
312
- #assert line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')), "Data format error!"
313
- if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
314
- __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
315
- if strict:
316
- raise Exception("Data format error! Header mismatch")
317
- correctColumnNum = len(header.strip().split('\t'))
318
- if verbose:
319
- __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
320
- else:
321
- correctColumnNum = len(lineToAppend)
342
+ if lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
343
+ correctColumnNum = len(header.strip().split('\t'))
344
+ if verbose:
345
+ __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
322
346
  # truncate / fill the lineToAppend to the correct number of columns
323
347
  if len(lineToAppend) < correctColumnNum:
324
348
  lineToAppend += ['']*(correctColumnNum-len(lineToAppend))
@@ -332,7 +356,7 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
332
356
  if verbose:
333
357
  __teePrintOrNot(f"Appended {lineToAppend} to {fileName}",teeLogger=teeLogger)
334
358
 
335
- def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8'):
359
+ def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False):
336
360
  """
337
361
  Clear the contents of a TSV file. Will create if not exist.
338
362
  Parameters:
@@ -342,29 +366,30 @@ def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose
342
366
  - verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
343
367
  - verbose (bool, optional): If True, additional information will be printed during the execution.
344
368
  - encoding (str, optional): The encoding of the file.
369
+ - strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
345
370
  """
346
371
  header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
347
- if not os.path.isfile(fileName):
348
- with open(fileName, mode ='w',encoding=encoding)as file:
349
- file.write(header)
372
+ if not verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False):
373
+ raise Exception("Something catastrophic happened! File still not found after creation")
350
374
  else:
351
375
  with open(fileName, mode ='r+',encoding=encoding)as file:
352
376
  if header.strip() and verifyHeader:
353
377
  line = file.readline().strip()
354
- if verbose:
355
- __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
356
- __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
357
- #assert line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')), "Data format error!"
358
- if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
359
- __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
360
- raise Exception("Data format error! Header mismatch")
361
- # if the header is correct, only keep the header
378
+ if not lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
379
+ __teePrintOrNot(f'Warning: Header mismatch in {fileName}. Keeping original header in file...','warning',teeLogger)
362
380
  file.truncate()
363
381
  else:
364
- file.write(header)
382
+ file.write(header+'\n')
365
383
  if verbose:
366
384
  __teePrintOrNot(f"Cleared {fileName}",teeLogger=teeLogger)
367
385
 
386
+ def getFileUpdateTimeNs(fileName):
387
+ try:
388
+ return os.stat(fileName).st_mtime_ns
389
+ except:
390
+ __teePrintOrNot(f"Failed to get file update time for {fileName}",'error')
391
+ return time.time_ns()
392
+
368
393
  # create a tsv class that functions like a ordered dictionary but will update the file when modified
369
394
  class TSVZed(OrderedDict):
370
395
  def __teePrintOrNot(self,message,level = 'info'):
@@ -372,13 +397,15 @@ class TSVZed(OrderedDict):
372
397
  if self.teeLogger:
373
398
  self.teeLogger.teelog(message,level)
374
399
  else:
375
- print(message)
400
+ print(message,flush=True)
376
401
  except Exception as e:
377
- print(message)
402
+ print(message,flush=True)
378
403
 
379
404
  def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = None):
380
405
  super().__init__()
381
406
  self.version = version
407
+ self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
408
+ self.lastUpdateTime = self.externalFileUpdateTime
382
409
  self._fileName = fileName
383
410
  self.teeLogger = teeLogger
384
411
  self.header = formatHeader(header,verbose = verbose,teeLogger = self.teeLogger)
@@ -389,6 +416,8 @@ class TSVZed(OrderedDict):
389
416
  self.rewrite_on_exit = rewrite_on_exit
390
417
  self.rewrite_interval = rewrite_interval
391
418
  self.monitor_external_changes = monitor_external_changes
419
+ if not monitor_external_changes:
420
+ self.__teePrintOrNot(f"Warning: External changes monitoring disabled for {self._fileName}. Will overwrite external changes.",'warning')
392
421
  self.verbose = verbose
393
422
  if append_check_delay < 0:
394
423
  append_check_delay = 0.00001
@@ -429,6 +458,8 @@ class TSVZed(OrderedDict):
429
458
  #super().update(loadedData)
430
459
  if self.verbose:
431
460
  self.__teePrintOrNot(f"TSVZed({self._fileName}) loaded")
461
+ self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
462
+ self.lastUpdateTime = self.externalFileUpdateTime
432
463
  self.memoryOnly = mo
433
464
  return self
434
465
 
@@ -472,6 +503,7 @@ class TSVZed(OrderedDict):
472
503
  if self.verbose:
473
504
  self.__teePrintOrNot(f"Appending {key} to the appendQueue")
474
505
  self.appendQueue.append('\t'.join(value))
506
+ self.lastUpdateTime = time.time_ns()
475
507
  # if not self.appendThread.is_alive():
476
508
  # self.commitAppendToFile()
477
509
  # else:
@@ -489,6 +521,7 @@ class TSVZed(OrderedDict):
489
521
  if self.memoryOnly:
490
522
  return
491
523
  self.__appendEmptyLine(key)
524
+ self.lastUpdateTime = time.time_ns()
492
525
 
493
526
  def __appendEmptyLine(self,key):
494
527
  self.dirty = True
@@ -512,6 +545,7 @@ class TSVZed(OrderedDict):
512
545
  if self.memoryOnly:
513
546
  return self
514
547
  self.clear_file()
548
+ self.lastUpdateTime = self.externalFileUpdateTime
515
549
  return self
516
550
 
517
551
  def clear_file(self):
@@ -532,6 +566,7 @@ class TSVZed(OrderedDict):
532
566
  self.dirty = False
533
567
  self.deSynced = False
534
568
  except Exception as e:
569
+ self.release_file_obj(file)
535
570
  self.__teePrintOrNot(f"Failed to write at clear_file() to {self._fileName}: {e}",'error')
536
571
  import traceback
537
572
  self.__teePrintOrNot(traceback.format_exc(),'error')
@@ -579,6 +614,7 @@ memoryOnly:{self.memoryOnly}
579
614
  key, value = super().popitem(last)
580
615
  if not self.memoryOnly:
581
616
  self.__appendEmptyLine(key)
617
+ self.lastUpdateTime = time.time_ns()
582
618
  return key, value
583
619
 
584
620
  __marker = object()
@@ -596,6 +632,7 @@ memoryOnly:{self.memoryOnly}
596
632
  value = super().pop(key)
597
633
  if not self.memoryOnly:
598
634
  self.__appendEmptyLine(key)
635
+ self.lastUpdateTime = time.time_ns()
599
636
  return value
600
637
 
601
638
  def move_to_end(self, key, last=True):
@@ -610,6 +647,7 @@ memoryOnly:{self.memoryOnly}
610
647
  self.__teePrintOrNot(f"rewrite_on_exit set to True")
611
648
  if self.verbose:
612
649
  self.__teePrintOrNot(f"Warning: Trying to move Key {key} moved to {'end' if last else 'beginning'} Need to resync for changes to apply to disk")
650
+ self.lastUpdateTime = time.time_ns()
613
651
  return self
614
652
 
615
653
  @classmethod
@@ -623,23 +661,29 @@ memoryOnly:{self.memoryOnly}
623
661
 
624
662
 
625
663
  def rewrite(self,force = False,reloadInternalFromFile = None):
626
- if not self.dirty and not force:
627
- return False
628
664
  if not self.deSynced and not force:
665
+ if not self.dirty:
666
+ return False
629
667
  if self.rewrite_interval == 0 or time.time() - os.path.getmtime(self._fileName) < self.rewrite_interval:
630
668
  return False
631
669
  try:
632
- if self.verbose:
633
- self.__teePrintOrNot(f"Rewriting {self._fileName}")
670
+
634
671
  if reloadInternalFromFile is None:
635
672
  reloadInternalFromFile = self.monitor_external_changes
636
- if reloadInternalFromFile:
673
+ if reloadInternalFromFile and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
637
674
  # this will be needed if more than 1 process is accessing the file
638
675
  self.commitAppendToFile()
639
676
  self.reload()
640
- self.mapToFile()
641
- if self.verbose:
642
- self.__teePrintOrNot(f"{len(self)} records rewrote to {self._fileName}")
677
+ if self.memoryOnly:
678
+ if self.verbose:
679
+ self.__teePrintOrNot(f"Memory only mode. Map to file skipped.")
680
+ return False
681
+ if self.dirty:
682
+ if self.verbose:
683
+ self.__teePrintOrNot(f"Rewriting {self._fileName}")
684
+ self.mapToFile()
685
+ if self.verbose:
686
+ self.__teePrintOrNot(f"{len(self)} records rewrote to {self._fileName}")
643
687
  if not self.appendThread.is_alive():
644
688
  self.commitAppendToFile()
645
689
  # else:
@@ -652,8 +696,10 @@ memoryOnly:{self.memoryOnly}
652
696
  self.deSynced = True
653
697
  return False
654
698
 
655
- def mapToFile(self):
699
+ def oldMapToFile(self):
656
700
  try:
701
+ if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
702
+ self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
657
703
  file = self.get_file_obj('w')
658
704
  if self.header:
659
705
  file.write(self.header+'\n')
@@ -666,14 +712,83 @@ memoryOnly:{self.memoryOnly}
666
712
  self.dirty = False
667
713
  self.deSynced = False
668
714
  except Exception as e:
669
- self.__teePrintOrNot(f"Failed to write at dumpToFile() to {self._fileName}: {e}",'error')
715
+ self.release_file_obj(file)
716
+ self.__teePrintOrNot(f"Failed to write at oldMapToFile() to {self._fileName}: {e}",'error')
670
717
  import traceback
671
718
  self.__teePrintOrNot(traceback.format_exc(),'error')
672
719
  self.deSynced = True
673
720
  return self
721
+
722
+ def mapToFile(self):
723
+ try:
724
+ if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
725
+ self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
726
+ file = self.get_file_obj('r+')
727
+ overWrite = False
728
+ line = file.readline()
729
+ aftPos = file.tell()
730
+ if self.header and not lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = False):
731
+ file.seek(0)
732
+ file.write(self.header+'\n')
733
+ # if the header is not the same length as the line, we need to overwrite the file
734
+ if aftPos != file.tell():
735
+ overWrite = True
736
+ if self.verbose:
737
+ self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
738
+ for value in self.values():
739
+ strToWrite = '\t'.join(value)+'\n'
740
+ if overWrite:
741
+ if self.verbose:
742
+ self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
743
+ file.write(strToWrite)
744
+ continue
745
+ pos = file.tell()
746
+ line = file.readline()
747
+ aftPos = file.tell()
748
+ if not line or pos == aftPos:
749
+ if self.verbose:
750
+ self.__teePrintOrNot(f"End of file reached. Appending {value} to {self._fileName}")
751
+ file.write(strToWrite)
752
+ overWrite = True
753
+ continue
754
+ if line != strToWrite:
755
+ if self.verbose:
756
+ self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
757
+ file.seek(pos)
758
+ # fill the string with space to write to the correct length
759
+ file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
760
+ if aftPos != file.tell():
761
+ overWrite = True
762
+ file.truncate()
763
+ self.release_file_obj(file)
764
+ if self.verbose:
765
+ self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
766
+ self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
767
+ self.dirty = False
768
+ self.deSynced = False
769
+ except Exception as e:
770
+ self.release_file_obj(file)
771
+ self.__teePrintOrNot(f"Failed to write at mapToFile() to {self._fileName}: {e}",'error')
772
+ import traceback
773
+ self.__teePrintOrNot(traceback.format_exc(),'error')
774
+ self.deSynced = True
775
+ return self
776
+
777
+ def checkExternalChanges(self):
778
+ if self.deSynced:
779
+ return self
780
+ realExternalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
781
+ if self.externalFileUpdateTime < realExternalFileUpdateTime:
782
+ self.deSynced = True
783
+ self.__teePrintOrNot(f"External changes detected in {self._fileName}")
784
+ elif self.externalFileUpdateTime > realExternalFileUpdateTime:
785
+ self.__teePrintOrNot(f"Time anomalies detected in {self._fileName}, resetting externalFileUpdateTime")
786
+ self.externalFileUpdateTime = realExternalFileUpdateTime
787
+ return self
674
788
 
675
789
  def _appendWorker(self):
676
790
  while not self.shutdownEvent.is_set():
791
+ self.checkExternalChanges()
677
792
  self.rewrite()
678
793
  self.commitAppendToFile()
679
794
  time.sleep(self.append_check_delay)
@@ -685,6 +800,11 @@ memoryOnly:{self.memoryOnly}
685
800
 
686
801
  def commitAppendToFile(self):
687
802
  if self.appendQueue:
803
+ if self.memoryOnly:
804
+ self.appendQueue.clear()
805
+ if self.verbose:
806
+ self.__teePrintOrNot(f"Memory only mode. Append queue cleared.")
807
+ return self
688
808
  try:
689
809
  if self.verbose:
690
810
  self.__teePrintOrNot(f"Commiting {len(self.appendQueue)} records to {self._fileName}")
@@ -698,6 +818,7 @@ memoryOnly:{self.memoryOnly}
698
818
  self.__teePrintOrNot(f"Records commited to {self._fileName}")
699
819
  self.__teePrintOrNot(f"After size of {self._fileName}: {os.path.getsize(self._fileName)}")
700
820
  except Exception as e:
821
+ self.release_file_obj(file)
701
822
  self.__teePrintOrNot(f"Failed to write at commitAppendToFile to {self._fileName}: {e}",'error')
702
823
  import traceback
703
824
  self.__teePrintOrNot(traceback.format_exc(),'error')
@@ -732,12 +853,20 @@ memoryOnly:{self.memoryOnly}
732
853
  if self.verbose:
733
854
  self.__teePrintOrNot(f"File {self._fileName} locked with mode {modes}")
734
855
  except Exception as e:
735
- self.writeLock.release() # Release the thread lock in case of an error
736
- raise e # Re-raise the exception to handle it outside or notify the user
856
+ try:
857
+ self.writeLock.release() # Release the thread lock in case of an error
858
+ except Exception as e:
859
+ self.__teePrintOrNot(f"Failed to release writeLock for {self._fileName}: {e}",'error')
860
+ self.__teePrintOrNot(f"Failed to open file {self._fileName}: {e}",'error')
737
861
  return file
738
862
 
739
863
  def release_file_obj(self,file):
864
+ # if write lock is already released, return
865
+ if not self.writeLock.locked():
866
+ return
740
867
  try:
868
+ file.flush() # Ensure the file is flushed before unlocking
869
+ os.fsync(file.fileno()) # Ensure the file is synced to disk before unlocking
741
870
  if os.name == 'posix':
742
871
  fcntl.lockf(file, fcntl.LOCK_UN)
743
872
  elif os.name == 'nt':
@@ -748,9 +877,16 @@ memoryOnly:{self.memoryOnly}
748
877
  if self.verbose:
749
878
  self.__teePrintOrNot(f"File {file.name} unlocked / released")
750
879
  except Exception as e:
751
- raise e # Re-raise the exception for external handling
752
- finally:
880
+ try:
881
+ self.writeLock.release() # Ensure the thread lock is always released
882
+ except Exception as e:
883
+ self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
884
+ self.__teePrintOrNot(f"Failed to release file {file.name}: {e}",'error')
885
+ try:
753
886
  self.writeLock.release() # Ensure the thread lock is always released
887
+ except Exception as e:
888
+ self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
889
+ self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
754
890
 
755
891
 
756
892
  def __main__():
@@ -758,7 +894,7 @@ def __main__():
758
894
  parser = argparse.ArgumentParser(description='TSVZed: A TSV file manager')
759
895
  parser.add_argument('filename', type=str, help='The TSV file to read')
760
896
  parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear'], help='The operation to perform. Default: read', default='read')
761
- parser.add_argument('line', type=str, nargs='*', help='The line to append to the TSV file. it follows as : \{key\} \{value1\} \{value2\} ... if a key without value be inserted, the value will get deleted.')
897
+ parser.add_argument('line', type=str, nargs='*', help='The line to append to the TSV file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
762
898
  parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the TSV file. seperate using \\t')
763
899
  parser.add_argument('-f', '--force', action='store_true', help='Force the operation. Ignore checks for column numbers / headers')
764
900
  parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name='TSVZ',
5
- version='2.62',
5
+ version='2.66',
6
6
  py_modules=['TSVZ'], # List of module names (without .py)
7
7
  description='An simple in memory wrapper around a TSV file to function as a database',
8
8
  author='Yufei Pan',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes