TSVZ 3.25__py3-none-any.whl → 3.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
TSVZ.py CHANGED
@@ -22,13 +22,16 @@ if os.name == 'nt':
22
22
  elif os.name == 'posix':
23
23
  import fcntl
24
24
 
25
- version = '3.25'
25
+ version = '3.27'
26
26
  __version__ = version
27
27
  author = 'pan@zopyr.us'
28
+ COMMIT_DATE = '2025-06-25'
28
29
 
29
30
  DEFAULT_DELIMITER = '\t'
30
31
  DEFAULTS_INDICATOR_KEY = '#_defaults_#'
31
32
 
33
+ COMPRESSED_FILE_EXTENSIONS = ['gz','gzip','bz2','bzip2','xz','lzma']
34
+
32
35
  def get_delimiter(delimiter,file_name = ''):
33
36
  global DEFAULT_DELIMITER
34
37
  if not delimiter:
@@ -57,6 +60,43 @@ def get_delimiter(delimiter,file_name = ''):
57
60
  DEFAULT_DELIMITER = rtn
58
61
  return rtn
59
62
 
63
+ def openFileAsCompressed(fileName,mode = 'rb',encoding = 'utf8',teeLogger = None,compressLevel = 1):
64
+ if 'b' not in mode:
65
+ mode += 't'
66
+ kwargs = {}
67
+ if 'r' not in mode:
68
+ if fileName.endswith('.xz'):
69
+ kwargs['preset'] = compressLevel
70
+ else:
71
+ kwargs['compresslevel'] = compressLevel
72
+ if 'b' not in mode:
73
+ kwargs['encoding'] = encoding
74
+ if fileName.endswith('.xz') or fileName.endswith('.lzma'):
75
+ try:
76
+ import lzma
77
+ return lzma.open(fileName, mode, **kwargs)
78
+ except:
79
+ __teePrintOrNot(f"Failed to open {fileName} with lzma, trying bin",teeLogger=teeLogger)
80
+ elif fileName.endswith('.gz') or fileName.endswith('.gzip'):
81
+ try:
82
+ import gzip
83
+ return gzip.open(fileName, mode, **kwargs)
84
+ except:
85
+ __teePrintOrNot(f"Failed to open {fileName} with gzip, trying bin",teeLogger=teeLogger)
86
+ elif fileName.endswith('.bz2') or fileName.endswith('.bzip2'):
87
+ try:
88
+ import bz2
89
+ return bz2.open(fileName, mode, **kwargs)
90
+ except:
91
+ __teePrintOrNot(f"Failed to open {fileName} with bz2, trying bin",teeLogger=teeLogger)
92
+ if 't' in mode:
93
+ mode = mode.replace('t','')
94
+ return open(fileName, mode, encoding=encoding)
95
+ if 'b' not in mode:
96
+ mode += 'b'
97
+ return open(fileName, mode)
98
+
99
+
60
100
  def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
61
101
  version = 1.11
62
102
  _ = version
@@ -392,7 +432,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
392
432
  delimiter = get_delimiter(delimiter,file_name=fileName)
393
433
  if verbose:
394
434
  __teePrintOrNot(f"Reading last line only from {fileName}",teeLogger=teeLogger)
395
- with open(fileName, 'rb') as file:
435
+ with openFileAsCompressed(fileName, 'rb',encoding=encoding, teeLogger=teeLogger) as file:
396
436
  file.seek(0, os.SEEK_END)
397
437
  file_size = file.tell()
398
438
  buffer = b''
@@ -416,7 +456,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
416
456
  if lines[i].strip(): # Skip empty lines
417
457
  # Process the line
418
458
  correctColumnNum, lineCache = _processLine(
419
- line=lines[i].decode(encoding=encoding),
459
+ line=lines[i].decode(encoding=encoding,errors='replace'),
420
460
  taskDic=taskDic,
421
461
  correctColumnNum=correctColumnNum,
422
462
  verbose=verbose,
@@ -503,19 +543,22 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
503
543
  Returns:
504
544
  bool: True if the file exists, False otherwise.
505
545
  """
506
- if delimiter and delimiter == '\t' and not fileName.endswith('.tsv'):
546
+ remainingFileName, _ ,extenstionName = fileName.rpartition('.')
547
+ if extenstionName in COMPRESSED_FILE_EXTENSIONS:
548
+ remainingFileName, _ ,extenstionName = remainingFileName.rpartition('.')
549
+ if delimiter and delimiter == '\t' and not extenstionName == 'tsv':
507
550
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .tsv','warning',teeLogger=teeLogger)
508
- elif delimiter and delimiter == ',' and not fileName.endswith('.csv'):
551
+ elif delimiter and delimiter == ',' and not extenstionName == 'csv':
509
552
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .csv','warning',teeLogger=teeLogger)
510
- elif delimiter and delimiter == '\0' and not fileName.endswith('.nsv'):
553
+ elif delimiter and delimiter == '\0' and not extenstionName == 'nsv':
511
554
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .nsv','warning',teeLogger=teeLogger)
512
- elif delimiter and delimiter == '|' and not fileName.endswith('.psv'):
555
+ elif delimiter and delimiter == '|' and not extenstionName == 'psv':
513
556
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .psv','warning',teeLogger=teeLogger)
514
557
  if not os.path.isfile(fileName):
515
558
  if createIfNotExist:
516
559
  try:
517
- with open(fileName, mode ='w',encoding=encoding)as file:
518
- file.write(header+'\n')
560
+ with openFileAsCompressed(fileName, mode ='wb',encoding=encoding,teeLogger=teeLogger)as file:
561
+ file.write(header.encode(encoding=encoding,errors='replace')+b'\n')
519
562
  __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
520
563
  return True
521
564
  except:
@@ -591,10 +634,10 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
591
634
  header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger, delimiter = delimiter)
592
635
  if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
593
636
  return taskDic
594
- with open(fileName, mode ='rb')as file:
637
+ with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
595
638
  correctColumnNum = -1
596
639
  if header.rstrip() and verifyHeader:
597
- line = file.readline().decode(encoding=encoding)
640
+ line = file.readline().decode(encoding=encoding,errors='replace')
598
641
  if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
599
642
  correctColumnNum = len(header.split(delimiter))
600
643
  if verbose:
@@ -605,7 +648,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
605
648
  taskDic[lineCache[0]] = lineCache
606
649
  return lineCache
607
650
  for line in file:
608
- correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
651
+ correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding,errors='replace'),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
609
652
  return taskDic
610
653
 
611
654
  def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
@@ -693,25 +736,27 @@ def appendLinesTabularFile(fileName,linesToAppend,teeLogger = None,header = '',c
693
736
  if verbose:
694
737
  __teePrintOrNot(f"No lines to append to {fileName}",teeLogger=teeLogger)
695
738
  return
696
- with open(fileName, mode ='r+b')as file:
697
- correctColumnNum = max([len(line) for line in formatedLines])
698
- if header.rstrip() and verifyHeader:
699
- line = file.readline().decode(encoding=encoding)
700
- if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
701
- correctColumnNum = len(header.split(delimiter))
702
- if verbose:
703
- __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
704
- # truncate / fill the lines to the correct number of columns
705
- for i in range(len(formatedLines)):
706
- if len(formatedLines[i]) < correctColumnNum:
707
- formatedLines[i] += ['']*(correctColumnNum-len(formatedLines[i]))
708
- elif len(formatedLines[i]) > correctColumnNum:
709
- formatedLines[i] = formatedLines[i][:correctColumnNum]
739
+ correctColumnNum = max([len(line) for line in formatedLines])
740
+
741
+ if header.rstrip() and verifyHeader:
742
+ with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
743
+ line = file.readline().decode(encoding=encoding,errors='replace')
744
+ if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
745
+ correctColumnNum = len(header.split(delimiter))
746
+ if verbose:
747
+ __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
748
+ # truncate / fill the lines to the correct number of columns
749
+ for i in range(len(formatedLines)):
750
+ if len(formatedLines[i]) < correctColumnNum:
751
+ formatedLines[i] += ['']*(correctColumnNum-len(formatedLines[i]))
752
+ elif len(formatedLines[i]) > correctColumnNum:
753
+ formatedLines[i] = formatedLines[i][:correctColumnNum]
754
+ with openFileAsCompressed(fileName, mode ='ab',encoding=encoding,teeLogger=teeLogger)as file:
710
755
  # check if the file ends in a newline
711
- file.seek(-1, os.SEEK_END)
712
- if file.read(1) != b'\n':
713
- file.write(b'\n')
714
- file.write(b'\n'.join([delimiter.join(line).encode(encoding=encoding) for line in formatedLines]) + b'\n')
756
+ # file.seek(-1, os.SEEK_END)
757
+ # if file.read(1) != b'\n':
758
+ # file.write(b'\n')
759
+ file.write(b'\n'.join([delimiter.join(line).encode(encoding=encoding,errors='replace') for line in formatedLines]) + b'\n')
715
760
  if verbose:
716
761
  __teePrintOrNot(f"Appended {len(formatedLines)} lines to {fileName}",teeLogger=teeLogger)
717
762
 
@@ -747,14 +792,17 @@ def clearTabularFile(fileName,teeLogger = None,header = '',verifyHeader = False,
747
792
  if not _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False,delimiter=delimiter):
748
793
  raise FileNotFoundError("Something catastrophic happened! File still not found after creation")
749
794
  else:
750
- with open(fileName, mode ='r+',encoding=encoding)as file:
795
+ with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
751
796
  if header.rstrip() and verifyHeader:
752
- line = file.readline()
797
+ line = file.readline().decode(encoding=encoding,errors='replace')
753
798
  if not _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
754
799
  __teePrintOrNot(f'Warning: Header mismatch in {fileName}. Keeping original header in file...','warning',teeLogger)
755
- file.truncate()
756
- else:
757
- file.write(header+'\n')
800
+ header = line
801
+ with openFileAsCompressed(fileName, mode ='wb',encoding=encoding,teeLogger=teeLogger)as file:
802
+ if header:
803
+ if not header.endswith('\n'):
804
+ header += '\n'
805
+ file.write(header.encode(encoding=encoding,errors='replace'))
758
806
  if verbose:
759
807
  __teePrintOrNot(f"Cleared {fileName}",teeLogger=teeLogger)
760
808
 
@@ -774,7 +822,69 @@ def get_time_ns():
774
822
  except:
775
823
  # try to get the time in nanoseconds
776
824
  return int(time.time()*1e9)
825
+
826
+ def scrubTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = '\t',defaults = ...):
827
+ """
828
+ Compatibility method, calls scrubTabularFile.
829
+ Scrub a Tabular (CSV / TSV / NSV) file by reading it and writing the contents back into the file.
830
+ Return the data as a dictionary.
831
+
832
+ Parameters:
833
+ - fileName (str): The path to the Tabular file.
834
+ - teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
835
+ - header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
836
+ - createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
837
+ - lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
838
+ - verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
839
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
840
+ - taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
841
+ - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
842
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
843
+ - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
844
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
845
+
846
+ Returns:
847
+ - OrderedDict: The dictionary containing the data from the Tabular file.
848
+
849
+ Raises:
850
+ - Exception: If the file is not found or there is a data format error.
851
+
852
+ """
853
+ return scrubTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
854
+
855
+ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = ...,defaults = ...):
856
+ """
857
+ Scrub a Tabular (CSV / TSV / NSV) file by reading it and writing the contents back into the file.
858
+ If using compressed files. This will recompress the file in whole and possibily increase the compression ratio reducing the file size.
859
+ Return the data as a dictionary.
777
860
 
861
+ Parameters:
862
+ - fileName (str): The path to the Tabular file.
863
+ - teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
864
+ - header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
865
+ - createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
866
+ - lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
867
+ - verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
868
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
869
+ - taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
870
+ - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
871
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
872
+ - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
873
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
874
+
875
+ Returns:
876
+ - OrderedDict: The dictionary containing the data from the Tabular file.
877
+
878
+ Raises:
879
+ - Exception: If the file is not found or there is a data format error.
880
+
881
+ """
882
+ file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
883
+ if file:
884
+ clearTabularFile(fileName,teeLogger = teeLogger,header = header,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
885
+ appendLinesTabularFile(fileName,file,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
886
+ return file
887
+
778
888
  # create a tsv class that functions like a ordered dictionary but will update the file when modified
779
889
  class TSVZed(OrderedDict):
780
890
  def __teePrintOrNot(self,message,level = 'info'):
@@ -1010,14 +1120,14 @@ class TSVZed(OrderedDict):
1010
1120
  def clear_file(self):
1011
1121
  try:
1012
1122
  if self.header:
1013
- file = self.get_file_obj('w')
1014
- file.write(self.header+'\n')
1123
+ file = self.get_file_obj('wb')
1124
+ file.write(self.header.encode(self.encoding,errors='replace') + b'\n')
1015
1125
  self.release_file_obj(file)
1016
1126
  if self.verbose:
1017
1127
  self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
1018
1128
  self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
1019
1129
  else:
1020
- file = self.get_file_obj('w')
1130
+ file = self.get_file_obj('wb')
1021
1131
  self.release_file_obj(file)
1022
1132
  if self.verbose:
1023
1133
  self.__teePrintOrNot(f"File {self._fileName} cleared empty")
@@ -1153,15 +1263,15 @@ memoryOnly:{self.memoryOnly}
1153
1263
  self.deSynced = True
1154
1264
  return False
1155
1265
 
1156
- def oldMapToFile(self):
1266
+ def hardMapToFile(self):
1157
1267
  try:
1158
1268
  if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
1159
1269
  self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
1160
- file = self.get_file_obj('w')
1270
+ file = self.get_file_obj('wb')
1161
1271
  if self.header:
1162
- file.write(self.header+'\n')
1272
+ file.write(self.header.encode(self.encoding,errors='replace') + b'\n')
1163
1273
  for key in self:
1164
- file.write(self.delimiter.join(self[key])+'\n')
1274
+ file.write(self.delimiter.join(self[key]).encode(encoding=self.encoding,errors='replace')+b'\n')
1165
1275
  self.release_file_obj(file)
1166
1276
  if self.verbose:
1167
1277
  self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
@@ -1170,7 +1280,7 @@ memoryOnly:{self.memoryOnly}
1170
1280
  self.deSynced = False
1171
1281
  except Exception as e:
1172
1282
  self.release_file_obj(file)
1173
- self.__teePrintOrNot(f"Failed to write at oldMapToFile() to {self._fileName}: {e}",'error')
1283
+ self.__teePrintOrNot(f"Failed to write at hardMapToFile() to {self._fileName}: {e}",'error')
1174
1284
  import traceback
1175
1285
  self.__teePrintOrNot(traceback.format_exc(),'error')
1176
1286
  self.deSynced = True
@@ -1182,14 +1292,17 @@ memoryOnly:{self.memoryOnly}
1182
1292
  try:
1183
1293
  if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
1184
1294
  self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
1295
+ if self._fileName.rpartition('.')[2] in COMPRESSED_FILE_EXTENSIONS:
1296
+ # if the file is compressed, we need to use the hardMapToFile method
1297
+ return self.hardMapToFile()
1185
1298
  file = self.get_file_obj('r+b')
1186
1299
  overWrite = False
1187
1300
  if self.header:
1188
- line = file.readline().decode(self.encoding)
1301
+ line = file.readline().decode(self.encoding,errors='replace')
1189
1302
  aftPos = file.tell()
1190
1303
  if not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = self.strict):
1191
1304
  file.seek(0)
1192
- file.write(f'{self.header}\n'.encode(encoding=self.encoding))
1305
+ file.write(f'{self.header}\n'.encode(encoding=self.encoding,errors='replace'))
1193
1306
  # if the header is not the same length as the line, we need to overwrite the file
1194
1307
  if aftPos != file.tell():
1195
1308
  overWrite = True
@@ -1202,7 +1315,7 @@ memoryOnly:{self.memoryOnly}
1202
1315
  if overWrite:
1203
1316
  if self.verbose:
1204
1317
  self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
1205
- file.write(strToWrite.encode(encoding=self.encoding)+b'\n')
1318
+ file.write(strToWrite.encode(encoding=self.encoding,errors='replace')+b'\n')
1206
1319
  continue
1207
1320
  pos = file.tell()
1208
1321
  line = file.readline()
@@ -1210,10 +1323,10 @@ memoryOnly:{self.memoryOnly}
1210
1323
  if not line or pos == aftPos:
1211
1324
  if self.verbose:
1212
1325
  self.__teePrintOrNot(f"End of file reached. Appending {value} to {self._fileName}")
1213
- file.write(strToWrite.encode(encoding=self.encoding))
1326
+ file.write(strToWrite.encode(encoding=self.encoding,errors='replace'))
1214
1327
  overWrite = True
1215
1328
  continue
1216
- strToWrite = strToWrite.encode(encoding=self.encoding).ljust(len(line)-1)+b'\n'
1329
+ strToWrite = strToWrite.encode(encoding=self.encoding,errors='replace').ljust(len(line)-1)+b'\n'
1217
1330
  if line != strToWrite:
1218
1331
  if self.verbose:
1219
1332
  self.__teePrintOrNot(f"Modifing {value} to {self._fileName}")
@@ -1236,6 +1349,8 @@ memoryOnly:{self.memoryOnly}
1236
1349
  import traceback
1237
1350
  self.__teePrintOrNot(traceback.format_exc(),'error')
1238
1351
  self.deSynced = True
1352
+ self.__teePrintOrNot("Trying failback hardMapToFile()")
1353
+ self.hardMapToFile()
1239
1354
  self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
1240
1355
  self.monitor_external_changes = mec
1241
1356
  return self
@@ -1278,10 +1393,10 @@ memoryOnly:{self.memoryOnly}
1278
1393
  if self.verbose:
1279
1394
  self.__teePrintOrNot(f"Commiting {len(self.appendQueue)} records to {self._fileName}")
1280
1395
  self.__teePrintOrNot(f"Before size of {self._fileName}: {os.path.getsize(self._fileName)}")
1281
- file = self.get_file_obj('a')
1396
+ file = self.get_file_obj('ab')
1282
1397
  while self.appendQueue:
1283
1398
  line = self.appendQueue.popleft()
1284
- file.write(line+'\n')
1399
+ file.write(line.encode(encoding=self.encoding,errors='replace')+b'\n')
1285
1400
  self.release_file_obj(file)
1286
1401
  if self.verbose:
1287
1402
  self.__teePrintOrNot(f"Records commited to {self._fileName}")
@@ -1306,15 +1421,12 @@ memoryOnly:{self.memoryOnly}
1306
1421
  if self.verbose:
1307
1422
  self.__teePrintOrNot(f"Append thread for {self._fileName} stopped")
1308
1423
 
1309
- def get_file_obj(self,modes = 'a'):
1424
+ def get_file_obj(self,modes = 'ab'):
1310
1425
  self.writeLock.acquire()
1311
1426
  try:
1312
- if 'b' not in modes:
1313
- if not self.encoding:
1314
- self.encoding = 'utf8'
1315
- file = open(self._fileName, mode=modes, encoding=self.encoding)
1316
- else:
1317
- file = open(self._fileName, mode=modes)
1427
+ if not self.encoding:
1428
+ self.encoding = 'utf8'
1429
+ file = openFileAsCompressed(self._fileName, mode=modes, encoding=self.encoding,teeLogger=self.teeLogger)
1318
1430
  # Lock the file after opening
1319
1431
  if os.name == 'posix':
1320
1432
  fcntl.lockf(file, fcntl.LOCK_EX)
@@ -1375,7 +1487,7 @@ def __main__():
1375
1487
  import argparse
1376
1488
  parser = argparse.ArgumentParser(description='TSVZed: A TSV / CSV / NSV file manager')
1377
1489
  parser.add_argument('filename', type=str, help='The file to read')
1378
- parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear'], help='The operation to perform. Default: read', default='read')
1490
+ parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear','scrub'], help='The operation to perform. Note: scrub will also remove all comments. Default: read', default='read')
1379
1491
  parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
1380
1492
  parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
1381
1493
  parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
@@ -1384,7 +1496,7 @@ def __main__():
1384
1496
  strictMode.add_argument('-s', '--strict', dest = 'strict',action='store_true', help='Strict mode. Do not parse values that seems malformed, check for column numbers / headers')
1385
1497
  strictMode.add_argument('-f', '--force', dest = 'strict',action='store_false', help='Force the operation. Ignore checks for column numbers / headers')
1386
1498
  parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
1387
- parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
1499
+ parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} @ {COMMIT_DATE} by {author}')
1388
1500
  args = parser.parse_args()
1389
1501
  args.delimiter = get_delimiter(delimiter=args.delimiter,file_name=args.filename)
1390
1502
  if args.header and args.header.endswith('\\'):
@@ -1416,6 +1528,8 @@ def __main__():
1416
1528
  appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
1417
1529
  elif args.operation == 'clear':
1418
1530
  clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
1531
+ elif args.operation == 'scrub':
1532
+ scrubTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= args.strict, delimiter=args.delimiter, defaults=defaults)
1419
1533
  else:
1420
1534
  print("Invalid operation")
1421
1535
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TSVZ
3
- Version: 3.25
3
+ Version: 3.27
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -0,0 +1,6 @@
1
+ TSVZ.py,sha256=G4SZbuw8WNALDqGegzrI7eEkgyKv7tyYDCa06Ux7lS0,77440
2
+ tsvz-3.27.dist-info/METADATA,sha256=9pwd-LcsbXgET3ZRZgRaqatgL6kCsKgGh5zodE1JACo,1826
3
+ tsvz-3.27.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
4
+ tsvz-3.27.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
+ tsvz-3.27.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
+ tsvz-3.27.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +0,0 @@
1
- TSVZ.py,sha256=LGbNbhS3BS9AH1AD9UQCyMk-f-iAgfBk7CXUdRr5Vy4,69461
2
- tsvz-3.25.dist-info/METADATA,sha256=8ArDrlBsAE26X80qLBeZ9gVJp8HFlFzd2o4EzhMTPUI,1826
3
- tsvz-3.25.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
4
- tsvz-3.25.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
- tsvz-3.25.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
- tsvz-3.25.dist-info/RECORD,,