TSVZ 3.17__tar.gz → 3.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsvz-3.17 → tsvz-3.20}/PKG-INFO +1 -1
- {tsvz-3.17 → tsvz-3.20}/TSVZ.egg-info/PKG-INFO +1 -1
- {tsvz-3.17 → tsvz-3.20}/TSVZ.py +43 -36
- {tsvz-3.17 → tsvz-3.20}/README.md +0 -0
- {tsvz-3.17 → tsvz-3.20}/TSVZ.egg-info/SOURCES.txt +0 -0
- {tsvz-3.17 → tsvz-3.20}/TSVZ.egg-info/dependency_links.txt +0 -0
- {tsvz-3.17 → tsvz-3.20}/TSVZ.egg-info/entry_points.txt +0 -0
- {tsvz-3.17 → tsvz-3.20}/TSVZ.egg-info/top_level.txt +0 -0
- {tsvz-3.17 → tsvz-3.20}/setup.cfg +0 -0
- {tsvz-3.17 → tsvz-3.20}/setup.py +0 -0
{tsvz-3.17 → tsvz-3.20}/PKG-INFO
RENAMED
{tsvz-3.17 → tsvz-3.20}/TSVZ.py
RENAMED
|
@@ -22,7 +22,7 @@ if os.name == 'nt':
|
|
|
22
22
|
elif os.name == 'posix':
|
|
23
23
|
import fcntl
|
|
24
24
|
|
|
25
|
-
version = '3.
|
|
25
|
+
version = '3.20'
|
|
26
26
|
__version__ = version
|
|
27
27
|
author = 'pan@zopyr.us'
|
|
28
28
|
|
|
@@ -30,12 +30,13 @@ DEFAULT_DELIMITER = '\t'
|
|
|
30
30
|
DEFAULTS_INDICATOR_KEY = '#_defaults_#'
|
|
31
31
|
|
|
32
32
|
def get_delimiter(delimiter,file_name = ''):
|
|
33
|
+
global DEFAULT_DELIMITER
|
|
33
34
|
if not delimiter:
|
|
34
35
|
return DEFAULT_DELIMITER
|
|
35
36
|
elif delimiter == ...:
|
|
36
37
|
if not file_name:
|
|
37
38
|
rtn = '\t'
|
|
38
|
-
|
|
39
|
+
elif file_name.endswith('.csv'):
|
|
39
40
|
rtn = ','
|
|
40
41
|
elif file_name.endswith('.nsv'):
|
|
41
42
|
rtn = '\0'
|
|
@@ -58,9 +59,10 @@ def get_delimiter(delimiter,file_name = ''):
|
|
|
58
59
|
|
|
59
60
|
def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
|
|
60
61
|
version = 1.11
|
|
62
|
+
_ = version
|
|
61
63
|
if not data:
|
|
62
64
|
return ''
|
|
63
|
-
if
|
|
65
|
+
if isinstance(data, str):
|
|
64
66
|
data = data.strip('\n').split('\n')
|
|
65
67
|
data = [line.split(delimiter) for line in data]
|
|
66
68
|
elif isinstance(data, dict):
|
|
@@ -72,7 +74,7 @@ def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
|
|
|
72
74
|
else:
|
|
73
75
|
# it is a dict of lists
|
|
74
76
|
data = [[key] + list(value) for key, value in data.items()]
|
|
75
|
-
elif
|
|
77
|
+
elif not isinstance(data,list):
|
|
76
78
|
data = list(data)
|
|
77
79
|
# format the list into 2d list of list of strings
|
|
78
80
|
if isinstance(data[0], dict):
|
|
@@ -262,10 +264,10 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
|
262
264
|
teeLogger.teelog(message,level)
|
|
263
265
|
else:
|
|
264
266
|
print(message,flush=True)
|
|
265
|
-
except Exception
|
|
267
|
+
except Exception:
|
|
266
268
|
print(message,flush=True)
|
|
267
269
|
|
|
268
|
-
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults =
|
|
270
|
+
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = None):
|
|
269
271
|
"""
|
|
270
272
|
Process a line of text and update the task dictionary.
|
|
271
273
|
|
|
@@ -282,6 +284,8 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
282
284
|
tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
|
|
283
285
|
|
|
284
286
|
"""
|
|
287
|
+
if not defaults:
|
|
288
|
+
defaults = []
|
|
285
289
|
line = line.strip(' ').strip('\x00').rstrip('\r\n')
|
|
286
290
|
# we throw away the lines that start with '#'
|
|
287
291
|
if not line :
|
|
@@ -428,7 +432,7 @@ def _formatHeader(header,verbose = False,teeLogger = None,delimiter = DEFAULT_DE
|
|
|
428
432
|
Returns:
|
|
429
433
|
str: The formatted header string.
|
|
430
434
|
"""
|
|
431
|
-
if
|
|
435
|
+
if not isinstance(header,str):
|
|
432
436
|
try:
|
|
433
437
|
header = delimiter.join(header)
|
|
434
438
|
except:
|
|
@@ -465,7 +469,7 @@ def _lineContainHeader(header,line,verbose = False,teeLogger = None,strict = Fal
|
|
|
465
469
|
if len(header) != len(line) or any([header[i] not in line[i] for i in range(len(header))]):
|
|
466
470
|
__teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header}",teeLogger=teeLogger)
|
|
467
471
|
if strict:
|
|
468
|
-
raise
|
|
472
|
+
raise ValueError("Data format error! Header mismatch")
|
|
469
473
|
return False
|
|
470
474
|
return True
|
|
471
475
|
|
|
@@ -500,7 +504,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
|
|
|
500
504
|
return True
|
|
501
505
|
elif strict:
|
|
502
506
|
__teePrintOrNot('File not found','error',teeLogger=teeLogger)
|
|
503
|
-
raise
|
|
507
|
+
raise FileNotFoundError("File not found")
|
|
504
508
|
else:
|
|
505
509
|
return False
|
|
506
510
|
return True
|
|
@@ -566,8 +570,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
566
570
|
return taskDic
|
|
567
571
|
with open(fileName, mode ='rb')as file:
|
|
568
572
|
correctColumnNum = -1
|
|
569
|
-
if header.rstrip():
|
|
570
|
-
if verifyHeader:
|
|
573
|
+
if header.rstrip() and verifyHeader:
|
|
571
574
|
line = file.readline().decode(encoding=encoding)
|
|
572
575
|
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
573
576
|
correctColumnNum = len(header.split(delimiter))
|
|
@@ -647,21 +650,24 @@ def appendLinesTabularFile(fileName,linesToAppend,teeLogger = None,header = '',c
|
|
|
647
650
|
return
|
|
648
651
|
formatedLines = []
|
|
649
652
|
for line in linesToAppend:
|
|
650
|
-
if
|
|
653
|
+
if isinstance(linesToAppend,dict):
|
|
654
|
+
key = line
|
|
655
|
+
line = linesToAppend[key]
|
|
656
|
+
if isinstance(line,str):
|
|
651
657
|
line = line.split(delimiter)
|
|
652
|
-
|
|
658
|
+
elif line:
|
|
653
659
|
for i in range(len(line)):
|
|
654
|
-
if
|
|
660
|
+
if not isinstance(line[i],str):
|
|
655
661
|
try:
|
|
656
662
|
line[i] = str(line[i])
|
|
657
663
|
except Exception as e:
|
|
658
664
|
line[i] = str(e)
|
|
665
|
+
if isinstance(linesToAppend,dict) and not line or line[0] != key:
|
|
666
|
+
line = [key]+line
|
|
659
667
|
formatedLines.append(line)
|
|
660
|
-
|
|
661
668
|
with open(fileName, mode ='r+b')as file:
|
|
662
669
|
correctColumnNum = max([len(line) for line in formatedLines])
|
|
663
|
-
if header.rstrip():
|
|
664
|
-
if verifyHeader:
|
|
670
|
+
if header.rstrip() and verifyHeader:
|
|
665
671
|
line = file.readline().decode(encoding=encoding)
|
|
666
672
|
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
667
673
|
correctColumnNum = len(header.split(delimiter))
|
|
@@ -711,7 +717,7 @@ def clearTabularFile(fileName,teeLogger = None,header = '',verifyHeader = False,
|
|
|
711
717
|
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
712
718
|
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger,delimiter=delimiter)
|
|
713
719
|
if not _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False,delimiter=delimiter):
|
|
714
|
-
raise
|
|
720
|
+
raise FileNotFoundError("Something catastrophic happened! File still not found after creation")
|
|
715
721
|
else:
|
|
716
722
|
with open(fileName, mode ='r+',encoding=encoding)as file:
|
|
717
723
|
if header.rstrip() and verifyHeader:
|
|
@@ -749,13 +755,13 @@ class TSVZed(OrderedDict):
|
|
|
749
755
|
self.teeLogger.teelog(message,level)
|
|
750
756
|
else:
|
|
751
757
|
print(message,flush=True)
|
|
752
|
-
except Exception
|
|
758
|
+
except Exception:
|
|
753
759
|
print(message,flush=True)
|
|
754
760
|
|
|
755
761
|
def getResourseUsage(self,return_dict = False):
|
|
756
762
|
return get_resource_usage(return_dict = return_dict)
|
|
757
763
|
|
|
758
|
-
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts =
|
|
764
|
+
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = None,strict = False):
|
|
759
765
|
super().__init__()
|
|
760
766
|
self.version = version
|
|
761
767
|
self.strict = strict
|
|
@@ -764,7 +770,7 @@ class TSVZed(OrderedDict):
|
|
|
764
770
|
self._fileName = fileName
|
|
765
771
|
self.teeLogger = teeLogger
|
|
766
772
|
self.delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
767
|
-
self.defaults = defualts
|
|
773
|
+
self.defaults = defualts if defualts else []
|
|
768
774
|
self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
|
|
769
775
|
self.correctColumnNum = -1
|
|
770
776
|
self.createIfNotExist = createIfNotExist
|
|
@@ -830,7 +836,12 @@ class TSVZed(OrderedDict):
|
|
|
830
836
|
readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = self.strict, delimiter = self.delimiter, defaults=self.defaults)
|
|
831
837
|
if self.verbose:
|
|
832
838
|
self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
|
|
833
|
-
|
|
839
|
+
if self.header and self.verifyHeader:
|
|
840
|
+
self.correctColumnNum = len(self.header.split(self.delimiter))
|
|
841
|
+
elif self:
|
|
842
|
+
self.correctColumnNum = len(self[next(iter(self))])
|
|
843
|
+
else:
|
|
844
|
+
self.correctColumnNum = -1
|
|
834
845
|
if self.verbose:
|
|
835
846
|
self.__teePrintOrNot(f"correctColumnNum: {self.correctColumnNum}")
|
|
836
847
|
#super().update(loadedData)
|
|
@@ -846,10 +857,10 @@ class TSVZed(OrderedDict):
|
|
|
846
857
|
if not key:
|
|
847
858
|
self.__teePrintOrNot('Key cannot be empty','error')
|
|
848
859
|
return
|
|
849
|
-
if
|
|
860
|
+
if isinstance(value,str):
|
|
850
861
|
value = value.split(self.delimiter)
|
|
851
862
|
# sanitize the value
|
|
852
|
-
value = [(str(segment).rstrip() if
|
|
863
|
+
value = [(str(segment).rstrip() if not isinstance(segment,str) else segment.rstrip()) if segment else '' for segment in value]
|
|
853
864
|
# escape the delimiter and newline characters
|
|
854
865
|
value = [segment.replace(self.delimiter,'<sep>').replace('\n','\\n') for segment in value]
|
|
855
866
|
# the first field in value should be the key
|
|
@@ -996,9 +1007,12 @@ class TSVZed(OrderedDict):
|
|
|
996
1007
|
def __enter__(self):
|
|
997
1008
|
return self
|
|
998
1009
|
|
|
999
|
-
def
|
|
1010
|
+
def close(self):
|
|
1000
1011
|
self.stopAppendThread()
|
|
1001
1012
|
return self
|
|
1013
|
+
|
|
1014
|
+
def __exit__(self,exc_type,exc_value,traceback):
|
|
1015
|
+
return self.close()
|
|
1002
1016
|
|
|
1003
1017
|
def __repr__(self):
|
|
1004
1018
|
return f"""TSVZed(
|
|
@@ -1019,16 +1033,11 @@ deSynced:{self.deSynced}
|
|
|
1019
1033
|
memoryOnly:{self.memoryOnly}
|
|
1020
1034
|
{dict(self)})"""
|
|
1021
1035
|
|
|
1022
|
-
def close(self):
|
|
1023
|
-
self.stopAppendThread()
|
|
1024
|
-
return self
|
|
1025
|
-
|
|
1026
1036
|
def __str__(self):
|
|
1027
1037
|
return f"TSVZed({self._fileName},{dict(self)})"
|
|
1028
1038
|
|
|
1029
1039
|
def __del__(self):
|
|
1030
|
-
self.
|
|
1031
|
-
return self
|
|
1040
|
+
return self.close()
|
|
1032
1041
|
|
|
1033
1042
|
def popitem(self, last=True):
|
|
1034
1043
|
key, value = super().popitem(last)
|
|
@@ -1354,14 +1363,14 @@ def __main__():
|
|
|
1354
1363
|
args.header += '\\'
|
|
1355
1364
|
try:
|
|
1356
1365
|
header = args.header.encode().decode('unicode_escape') if args.header else ''
|
|
1357
|
-
except Exception
|
|
1366
|
+
except Exception:
|
|
1358
1367
|
print(f"Failed to decode header: {args.header}")
|
|
1359
1368
|
header = ''
|
|
1360
1369
|
defaults = []
|
|
1361
1370
|
if args.defaults:
|
|
1362
1371
|
try:
|
|
1363
1372
|
defaults = args.defaults.encode().decode('unicode_escape').split(args.delimiter)
|
|
1364
|
-
except Exception
|
|
1373
|
+
except Exception:
|
|
1365
1374
|
print(f"Failed to decode defaults: {args.defaults}")
|
|
1366
1375
|
defaults = []
|
|
1367
1376
|
|
|
@@ -1380,9 +1389,7 @@ def __main__():
|
|
|
1380
1389
|
elif args.operation == 'clear':
|
|
1381
1390
|
clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
|
|
1382
1391
|
else:
|
|
1383
|
-
print("Invalid operation")
|
|
1384
|
-
return
|
|
1385
|
-
|
|
1392
|
+
print("Invalid operation")
|
|
1386
1393
|
if __name__ == '__main__':
|
|
1387
1394
|
__main__()
|
|
1388
1395
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tsvz-3.17 → tsvz-3.20}/setup.py
RENAMED
|
File without changes
|