TSVZ 3.25__py3-none-any.whl → 3.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
TSVZ.py CHANGED
@@ -22,13 +22,16 @@ if os.name == 'nt':
22
22
  elif os.name == 'posix':
23
23
  import fcntl
24
24
 
25
- version = '3.25'
25
+ version = '3.26'
26
26
  __version__ = version
27
27
  author = 'pan@zopyr.us'
28
+ COMMIT_DATE = '2025-05-19'
28
29
 
29
30
  DEFAULT_DELIMITER = '\t'
30
31
  DEFAULTS_INDICATOR_KEY = '#_defaults_#'
31
32
 
33
+ COMPRESSED_FILE_EXTENSIONS = ['gz','gzip','bz2','bzip2','xz','lzma']
34
+
32
35
  def get_delimiter(delimiter,file_name = ''):
33
36
  global DEFAULT_DELIMITER
34
37
  if not delimiter:
@@ -57,6 +60,43 @@ def get_delimiter(delimiter,file_name = ''):
57
60
  DEFAULT_DELIMITER = rtn
58
61
  return rtn
59
62
 
63
+ def openFileAsCompressed(fileName,mode = 'rb',encoding = 'utf8',teeLogger = None,compressLevel = 1):
64
+ if 'b' not in mode:
65
+ mode += 't'
66
+ kwargs = {}
67
+ if 'r' not in mode:
68
+ if fileName.endswith('.xz'):
69
+ kwargs['preset'] = compressLevel
70
+ else:
71
+ kwargs['compresslevel'] = compressLevel
72
+ if 'b' not in mode:
73
+ kwargs['encoding'] = encoding
74
+ if fileName.endswith('.xz') or fileName.endswith('.lzma'):
75
+ try:
76
+ import lzma
77
+ return lzma.open(fileName, mode, **kwargs)
78
+ except:
79
+ __teePrintOrNot(f"Failed to open {fileName} with lzma, trying bin",teeLogger=teeLogger)
80
+ elif fileName.endswith('.gz') or fileName.endswith('.gzip'):
81
+ try:
82
+ import gzip
83
+ return gzip.open(fileName, mode, **kwargs)
84
+ except:
85
+ __teePrintOrNot(f"Failed to open {fileName} with gzip, trying bin",teeLogger=teeLogger)
86
+ elif fileName.endswith('.bz2') or fileName.endswith('.bzip2'):
87
+ try:
88
+ import bz2
89
+ return bz2.open(fileName, mode, **kwargs)
90
+ except:
91
+ __teePrintOrNot(f"Failed to open {fileName} with bz2, trying bin",teeLogger=teeLogger)
92
+ if 't' in mode:
93
+ mode = mode.replace('t','')
94
+ return open(fileName, mode, encoding=encoding)
95
+ if 'b' not in mode:
96
+ mode += 'b'
97
+ return open(fileName, mode)
98
+
99
+
60
100
  def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
61
101
  version = 1.11
62
102
  _ = version
@@ -392,7 +432,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
392
432
  delimiter = get_delimiter(delimiter,file_name=fileName)
393
433
  if verbose:
394
434
  __teePrintOrNot(f"Reading last line only from {fileName}",teeLogger=teeLogger)
395
- with open(fileName, 'rb') as file:
435
+ with openFileAsCompressed(fileName, 'rb',encoding=encoding, teeLogger=teeLogger) as file:
396
436
  file.seek(0, os.SEEK_END)
397
437
  file_size = file.tell()
398
438
  buffer = b''
@@ -416,7 +456,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
416
456
  if lines[i].strip(): # Skip empty lines
417
457
  # Process the line
418
458
  correctColumnNum, lineCache = _processLine(
419
- line=lines[i].decode(encoding=encoding),
459
+ line=lines[i].decode(encoding=encoding,errors='replace'),
420
460
  taskDic=taskDic,
421
461
  correctColumnNum=correctColumnNum,
422
462
  verbose=verbose,
@@ -503,19 +543,22 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
503
543
  Returns:
504
544
  bool: True if the file exists, False otherwise.
505
545
  """
506
- if delimiter and delimiter == '\t' and not fileName.endswith('.tsv'):
546
+ remainingFileName, _ ,extenstionName = fileName.rpartition('.')
547
+ if extenstionName in COMPRESSED_FILE_EXTENSIONS:
548
+ remainingFileName, _ ,extenstionName = remainingFileName.rpartition('.')
549
+ if delimiter and delimiter == '\t' and not extenstionName == 'tsv':
507
550
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .tsv','warning',teeLogger=teeLogger)
508
- elif delimiter and delimiter == ',' and not fileName.endswith('.csv'):
551
+ elif delimiter and delimiter == ',' and not extenstionName == 'csv':
509
552
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .csv','warning',teeLogger=teeLogger)
510
- elif delimiter and delimiter == '\0' and not fileName.endswith('.nsv'):
553
+ elif delimiter and delimiter == '\0' and not extenstionName == 'nsv':
511
554
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .nsv','warning',teeLogger=teeLogger)
512
- elif delimiter and delimiter == '|' and not fileName.endswith('.psv'):
555
+ elif delimiter and delimiter == '|' and not extenstionName == 'psv':
513
556
  __teePrintOrNot(f'Warning: Filename {fileName} does not end with .psv','warning',teeLogger=teeLogger)
514
557
  if not os.path.isfile(fileName):
515
558
  if createIfNotExist:
516
559
  try:
517
- with open(fileName, mode ='w',encoding=encoding)as file:
518
- file.write(header+'\n')
560
+ with openFileAsCompressed(fileName, mode ='wb',encoding=encoding,teeLogger=teeLogger)as file:
561
+ file.write(header.encode(encoding=encoding,errors='replace')+b'\n')
519
562
  __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
520
563
  return True
521
564
  except:
@@ -591,10 +634,10 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
591
634
  header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger, delimiter = delimiter)
592
635
  if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
593
636
  return taskDic
594
- with open(fileName, mode ='rb')as file:
637
+ with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
595
638
  correctColumnNum = -1
596
639
  if header.rstrip() and verifyHeader:
597
- line = file.readline().decode(encoding=encoding)
640
+ line = file.readline().decode(encoding=encoding,errors='replace')
598
641
  if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
599
642
  correctColumnNum = len(header.split(delimiter))
600
643
  if verbose:
@@ -605,7 +648,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
605
648
  taskDic[lineCache[0]] = lineCache
606
649
  return lineCache
607
650
  for line in file:
608
- correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
651
+ correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding,errors='replace'),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
609
652
  return taskDic
610
653
 
611
654
  def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
@@ -693,10 +736,10 @@ def appendLinesTabularFile(fileName,linesToAppend,teeLogger = None,header = '',c
693
736
  if verbose:
694
737
  __teePrintOrNot(f"No lines to append to {fileName}",teeLogger=teeLogger)
695
738
  return
696
- with open(fileName, mode ='r+b')as file:
739
+ with openFileAsCompressed(fileName, mode ='ab',encoding=encoding,teeLogger=teeLogger)as file:
697
740
  correctColumnNum = max([len(line) for line in formatedLines])
698
741
  if header.rstrip() and verifyHeader:
699
- line = file.readline().decode(encoding=encoding)
742
+ line = file.readline().decode(encoding=encoding,errors='replace')
700
743
  if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
701
744
  correctColumnNum = len(header.split(delimiter))
702
745
  if verbose:
@@ -708,10 +751,10 @@ def appendLinesTabularFile(fileName,linesToAppend,teeLogger = None,header = '',c
708
751
  elif len(formatedLines[i]) > correctColumnNum:
709
752
  formatedLines[i] = formatedLines[i][:correctColumnNum]
710
753
  # check if the file ends in a newline
711
- file.seek(-1, os.SEEK_END)
712
- if file.read(1) != b'\n':
713
- file.write(b'\n')
714
- file.write(b'\n'.join([delimiter.join(line).encode(encoding=encoding) for line in formatedLines]) + b'\n')
754
+ # file.seek(-1, os.SEEK_END)
755
+ # if file.read(1) != b'\n':
756
+ # file.write(b'\n')
757
+ file.write(b'\n'.join([delimiter.join(line).encode(encoding=encoding,errors='replace') for line in formatedLines]) + b'\n')
715
758
  if verbose:
716
759
  __teePrintOrNot(f"Appended {len(formatedLines)} lines to {fileName}",teeLogger=teeLogger)
717
760
 
@@ -747,14 +790,17 @@ def clearTabularFile(fileName,teeLogger = None,header = '',verifyHeader = False,
747
790
  if not _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False,delimiter=delimiter):
748
791
  raise FileNotFoundError("Something catastrophic happened! File still not found after creation")
749
792
  else:
750
- with open(fileName, mode ='r+',encoding=encoding)as file:
793
+ with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
751
794
  if header.rstrip() and verifyHeader:
752
- line = file.readline()
795
+ line = file.readline().decode(encoding=encoding,errors='replace')
753
796
  if not _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
754
797
  __teePrintOrNot(f'Warning: Header mismatch in {fileName}. Keeping original header in file...','warning',teeLogger)
755
- file.truncate()
756
- else:
757
- file.write(header+'\n')
798
+ header = line
799
+ with openFileAsCompressed(fileName, mode ='wb',encoding=encoding,teeLogger=teeLogger)as file:
800
+ if header:
801
+ if not header.endswith('\n'):
802
+ header += '\n'
803
+ file.write(header.encode(encoding=encoding,errors='replace'))
758
804
  if verbose:
759
805
  __teePrintOrNot(f"Cleared {fileName}",teeLogger=teeLogger)
760
806
 
@@ -774,7 +820,69 @@ def get_time_ns():
774
820
  except:
775
821
  # try to get the time in nanoseconds
776
822
  return int(time.time()*1e9)
823
+
824
+ def scrubTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = '\t',defaults = ...):
825
+ """
826
+ Compatibility method, calls scrubTabularFile.
827
+ Scrub a Tabular (CSV / TSV / NSV) file by reading it and writing the contents back into the file.
828
+ Return the data as a dictionary.
829
+
830
+ Parameters:
831
+ - fileName (str): The path to the Tabular file.
832
+ - teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
833
+ - header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
834
+ - createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
835
+ - lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
836
+ - verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
837
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
838
+ - taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
839
+ - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
840
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
841
+ - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
842
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
843
+
844
+ Returns:
845
+ - OrderedDict: The dictionary containing the data from the Tabular file.
846
+
847
+ Raises:
848
+ - Exception: If the file is not found or there is a data format error.
849
+
850
+ """
851
+ return scrubTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
852
+
853
+ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = ...,defaults = ...):
854
+ """
855
+ Scrub a Tabular (CSV / TSV / NSV) file by reading it and writing the contents back into the file.
856
+ If using compressed files. This will recompress the file in whole and possibily increase the compression ratio reducing the file size.
857
+ Return the data as a dictionary.
777
858
 
859
+ Parameters:
860
+ - fileName (str): The path to the Tabular file.
861
+ - teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
862
+ - header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
863
+ - createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
864
+ - lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
865
+ - verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
866
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
867
+ - taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
868
+ - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
869
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
870
+ - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
871
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
872
+
873
+ Returns:
874
+ - OrderedDict: The dictionary containing the data from the Tabular file.
875
+
876
+ Raises:
877
+ - Exception: If the file is not found or there is a data format error.
878
+
879
+ """
880
+ file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
881
+ if file:
882
+ clearTabularFile(fileName,teeLogger = teeLogger,header = header,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
883
+ appendLinesTabularFile(fileName,file,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
884
+ return file
885
+
778
886
  # create a tsv class that functions like a ordered dictionary but will update the file when modified
779
887
  class TSVZed(OrderedDict):
780
888
  def __teePrintOrNot(self,message,level = 'info'):
@@ -1010,14 +1118,14 @@ class TSVZed(OrderedDict):
1010
1118
  def clear_file(self):
1011
1119
  try:
1012
1120
  if self.header:
1013
- file = self.get_file_obj('w')
1014
- file.write(self.header+'\n')
1121
+ file = self.get_file_obj('wb')
1122
+ file.write(self.header.encode(self.encoding,errors='replace') + b'\n')
1015
1123
  self.release_file_obj(file)
1016
1124
  if self.verbose:
1017
1125
  self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
1018
1126
  self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
1019
1127
  else:
1020
- file = self.get_file_obj('w')
1128
+ file = self.get_file_obj('wb')
1021
1129
  self.release_file_obj(file)
1022
1130
  if self.verbose:
1023
1131
  self.__teePrintOrNot(f"File {self._fileName} cleared empty")
@@ -1153,15 +1261,15 @@ memoryOnly:{self.memoryOnly}
1153
1261
  self.deSynced = True
1154
1262
  return False
1155
1263
 
1156
- def oldMapToFile(self):
1264
+ def hardMapToFile(self):
1157
1265
  try:
1158
1266
  if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
1159
1267
  self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
1160
- file = self.get_file_obj('w')
1268
+ file = self.get_file_obj('wb')
1161
1269
  if self.header:
1162
- file.write(self.header+'\n')
1270
+ file.write(self.header.encode(self.encoding,errors='replace') + b'\n')
1163
1271
  for key in self:
1164
- file.write(self.delimiter.join(self[key])+'\n')
1272
+ file.write(self.delimiter.join(self[key]).encode(encoding=self.encoding,errors='replace')+b'\n')
1165
1273
  self.release_file_obj(file)
1166
1274
  if self.verbose:
1167
1275
  self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
@@ -1170,7 +1278,7 @@ memoryOnly:{self.memoryOnly}
1170
1278
  self.deSynced = False
1171
1279
  except Exception as e:
1172
1280
  self.release_file_obj(file)
1173
- self.__teePrintOrNot(f"Failed to write at oldMapToFile() to {self._fileName}: {e}",'error')
1281
+ self.__teePrintOrNot(f"Failed to write at hardMapToFile() to {self._fileName}: {e}",'error')
1174
1282
  import traceback
1175
1283
  self.__teePrintOrNot(traceback.format_exc(),'error')
1176
1284
  self.deSynced = True
@@ -1182,14 +1290,17 @@ memoryOnly:{self.memoryOnly}
1182
1290
  try:
1183
1291
  if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
1184
1292
  self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
1293
+ if self._fileName.rpartition('.')[2] in COMPRESSED_FILE_EXTENSIONS:
1294
+ # if the file is compressed, we need to use the hardMapToFile method
1295
+ return self.hardMapToFile()
1185
1296
  file = self.get_file_obj('r+b')
1186
1297
  overWrite = False
1187
1298
  if self.header:
1188
- line = file.readline().decode(self.encoding)
1299
+ line = file.readline().decode(self.encoding,errors='replace')
1189
1300
  aftPos = file.tell()
1190
1301
  if not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = self.strict):
1191
1302
  file.seek(0)
1192
- file.write(f'{self.header}\n'.encode(encoding=self.encoding))
1303
+ file.write(f'{self.header}\n'.encode(encoding=self.encoding,errors='replace'))
1193
1304
  # if the header is not the same length as the line, we need to overwrite the file
1194
1305
  if aftPos != file.tell():
1195
1306
  overWrite = True
@@ -1202,7 +1313,7 @@ memoryOnly:{self.memoryOnly}
1202
1313
  if overWrite:
1203
1314
  if self.verbose:
1204
1315
  self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
1205
- file.write(strToWrite.encode(encoding=self.encoding)+b'\n')
1316
+ file.write(strToWrite.encode(encoding=self.encoding,errors='replace')+b'\n')
1206
1317
  continue
1207
1318
  pos = file.tell()
1208
1319
  line = file.readline()
@@ -1210,10 +1321,10 @@ memoryOnly:{self.memoryOnly}
1210
1321
  if not line or pos == aftPos:
1211
1322
  if self.verbose:
1212
1323
  self.__teePrintOrNot(f"End of file reached. Appending {value} to {self._fileName}")
1213
- file.write(strToWrite.encode(encoding=self.encoding))
1324
+ file.write(strToWrite.encode(encoding=self.encoding,errors='replace'))
1214
1325
  overWrite = True
1215
1326
  continue
1216
- strToWrite = strToWrite.encode(encoding=self.encoding).ljust(len(line)-1)+b'\n'
1327
+ strToWrite = strToWrite.encode(encoding=self.encoding,errors='replace').ljust(len(line)-1)+b'\n'
1217
1328
  if line != strToWrite:
1218
1329
  if self.verbose:
1219
1330
  self.__teePrintOrNot(f"Modifing {value} to {self._fileName}")
@@ -1236,6 +1347,8 @@ memoryOnly:{self.memoryOnly}
1236
1347
  import traceback
1237
1348
  self.__teePrintOrNot(traceback.format_exc(),'error')
1238
1349
  self.deSynced = True
1350
+ self.__teePrintOrNot("Trying failback hardMapToFile()")
1351
+ self.hardMapToFile()
1239
1352
  self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
1240
1353
  self.monitor_external_changes = mec
1241
1354
  return self
@@ -1278,10 +1391,10 @@ memoryOnly:{self.memoryOnly}
1278
1391
  if self.verbose:
1279
1392
  self.__teePrintOrNot(f"Commiting {len(self.appendQueue)} records to {self._fileName}")
1280
1393
  self.__teePrintOrNot(f"Before size of {self._fileName}: {os.path.getsize(self._fileName)}")
1281
- file = self.get_file_obj('a')
1394
+ file = self.get_file_obj('ab')
1282
1395
  while self.appendQueue:
1283
1396
  line = self.appendQueue.popleft()
1284
- file.write(line+'\n')
1397
+ file.write(line.encode(encoding=self.encoding,errors='replace')+b'\n')
1285
1398
  self.release_file_obj(file)
1286
1399
  if self.verbose:
1287
1400
  self.__teePrintOrNot(f"Records commited to {self._fileName}")
@@ -1306,15 +1419,12 @@ memoryOnly:{self.memoryOnly}
1306
1419
  if self.verbose:
1307
1420
  self.__teePrintOrNot(f"Append thread for {self._fileName} stopped")
1308
1421
 
1309
- def get_file_obj(self,modes = 'a'):
1422
+ def get_file_obj(self,modes = 'ab'):
1310
1423
  self.writeLock.acquire()
1311
1424
  try:
1312
- if 'b' not in modes:
1313
- if not self.encoding:
1314
- self.encoding = 'utf8'
1315
- file = open(self._fileName, mode=modes, encoding=self.encoding)
1316
- else:
1317
- file = open(self._fileName, mode=modes)
1425
+ if not self.encoding:
1426
+ self.encoding = 'utf8'
1427
+ file = openFileAsCompressed(self._fileName, mode=modes, encoding=self.encoding,teeLogger=self.teeLogger)
1318
1428
  # Lock the file after opening
1319
1429
  if os.name == 'posix':
1320
1430
  fcntl.lockf(file, fcntl.LOCK_EX)
@@ -1375,7 +1485,7 @@ def __main__():
1375
1485
  import argparse
1376
1486
  parser = argparse.ArgumentParser(description='TSVZed: A TSV / CSV / NSV file manager')
1377
1487
  parser.add_argument('filename', type=str, help='The file to read')
1378
- parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear'], help='The operation to perform. Default: read', default='read')
1488
+ parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear','scrub'], help='The operation to perform. Note: scrub will also remove all comments. Default: read', default='read')
1379
1489
  parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
1380
1490
  parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
1381
1491
  parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
@@ -1384,7 +1494,7 @@ def __main__():
1384
1494
  strictMode.add_argument('-s', '--strict', dest = 'strict',action='store_true', help='Strict mode. Do not parse values that seems malformed, check for column numbers / headers')
1385
1495
  strictMode.add_argument('-f', '--force', dest = 'strict',action='store_false', help='Force the operation. Ignore checks for column numbers / headers')
1386
1496
  parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
1387
- parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
1497
+ parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} @ {COMMIT_DATE} by {author}')
1388
1498
  args = parser.parse_args()
1389
1499
  args.delimiter = get_delimiter(delimiter=args.delimiter,file_name=args.filename)
1390
1500
  if args.header and args.header.endswith('\\'):
@@ -1416,6 +1526,8 @@ def __main__():
1416
1526
  appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
1417
1527
  elif args.operation == 'clear':
1418
1528
  clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
1529
+ elif args.operation == 'scrub':
1530
+ scrubTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= args.strict, delimiter=args.delimiter, defaults=defaults)
1419
1531
  else:
1420
1532
  print("Invalid operation")
1421
1533
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TSVZ
3
- Version: 3.25
3
+ Version: 3.26
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -0,0 +1,6 @@
1
+ TSVZ.py,sha256=WanY7DemCKyfMB4qOiAFkYj_95AaeqQ4R6x02UTg89Q,77385
2
+ tsvz-3.26.dist-info/METADATA,sha256=hfHZtBL5SxPxkPvar3SWXLrA9Vps5HqFPNxhnqSAh2k,1826
3
+ tsvz-3.26.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
4
+ tsvz-3.26.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
+ tsvz-3.26.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
+ tsvz-3.26.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +0,0 @@
1
- TSVZ.py,sha256=LGbNbhS3BS9AH1AD9UQCyMk-f-iAgfBk7CXUdRr5Vy4,69461
2
- tsvz-3.25.dist-info/METADATA,sha256=8ArDrlBsAE26X80qLBeZ9gVJp8HFlFzd2o4EzhMTPUI,1826
3
- tsvz-3.25.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
4
- tsvz-3.25.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
- tsvz-3.25.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
- tsvz-3.25.dist-info/RECORD,,