TSVZ 3.28__tar.gz → 3.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsvz-3.28 → tsvz-3.30}/PKG-INFO +1 -1
- {tsvz-3.28 → tsvz-3.30}/README.md +0 -0
- {tsvz-3.28 → tsvz-3.30}/TSVZ.egg-info/PKG-INFO +1 -1
- {tsvz-3.28 → tsvz-3.30}/TSVZ.egg-info/SOURCES.txt +0 -0
- {tsvz-3.28 → tsvz-3.30}/TSVZ.egg-info/dependency_links.txt +0 -0
- {tsvz-3.28 → tsvz-3.30}/TSVZ.egg-info/entry_points.txt +0 -0
- {tsvz-3.28 → tsvz-3.30}/TSVZ.egg-info/top_level.txt +0 -0
- {tsvz-3.28 → tsvz-3.30}/TSVZ.py +52 -15
- {tsvz-3.28 → tsvz-3.30}/setup.py +0 -0
- {tsvz-3.28 → tsvz-3.30}/setup.cfg +0 -0
{tsvz-3.28 → tsvz-3.30}/PKG-INFO
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tsvz-3.28 → tsvz-3.30}/TSVZ.py
RENAMED
|
@@ -22,10 +22,10 @@ if os.name == 'nt':
|
|
|
22
22
|
elif os.name == 'posix':
|
|
23
23
|
import fcntl
|
|
24
24
|
|
|
25
|
-
version = '3.
|
|
25
|
+
version = '3.30'
|
|
26
26
|
__version__ = version
|
|
27
27
|
author = 'pan@zopyr.us'
|
|
28
|
-
COMMIT_DATE = '2025-
|
|
28
|
+
COMMIT_DATE = '2025-09-15'
|
|
29
29
|
|
|
30
30
|
DEFAULT_DELIMITER = '\t'
|
|
31
31
|
DEFAULTS_INDICATOR_KEY = '#_defaults_#'
|
|
@@ -573,7 +573,9 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
|
|
|
573
573
|
return False
|
|
574
574
|
return True
|
|
575
575
|
|
|
576
|
-
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
576
|
+
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
577
|
+
verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = ...,
|
|
578
|
+
correctColumnNum = -1):
|
|
577
579
|
"""
|
|
578
580
|
Compatibility method, calls readTabularFile.
|
|
579
581
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
@@ -591,6 +593,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
591
593
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
592
594
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
|
|
593
595
|
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
596
|
+
- correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
|
|
594
597
|
|
|
595
598
|
Returns:
|
|
596
599
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -599,9 +602,14 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
599
602
|
- Exception: If the file is not found or there is a data format error.
|
|
600
603
|
|
|
601
604
|
"""
|
|
602
|
-
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
603
|
-
|
|
604
|
-
|
|
605
|
+
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
606
|
+
lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,
|
|
607
|
+
encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults,
|
|
608
|
+
correctColumnNum = correctColumnNum)
|
|
609
|
+
|
|
610
|
+
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
611
|
+
verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = ...,
|
|
612
|
+
correctColumnNum = -1):
|
|
605
613
|
"""
|
|
606
614
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
607
615
|
|
|
@@ -618,6 +626,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
618
626
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
619
627
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
620
628
|
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
629
|
+
- correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
|
|
621
630
|
|
|
622
631
|
Returns:
|
|
623
632
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -635,10 +644,9 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
635
644
|
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
636
645
|
return taskDic
|
|
637
646
|
with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
|
|
638
|
-
correctColumnNum = -1
|
|
639
647
|
if header.rstrip() and verifyHeader:
|
|
640
648
|
line = file.readline().decode(encoding=encoding,errors='replace')
|
|
641
|
-
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
649
|
+
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict) and correctColumnNum == -1:
|
|
642
650
|
correctColumnNum = len(header.split(delimiter))
|
|
643
651
|
if verbose:
|
|
644
652
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
@@ -852,7 +860,8 @@ def scrubTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, las
|
|
|
852
860
|
"""
|
|
853
861
|
return scrubTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
|
|
854
862
|
|
|
855
|
-
def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
863
|
+
def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
864
|
+
verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = ...,defaults = ...,correctColumnNum = -1):
|
|
856
865
|
"""
|
|
857
866
|
Scrub a Tabular (CSV / TSV / NSV) file by reading it and writing the contents back into the file.
|
|
858
867
|
If using compressed files. This will recompress the file in whole and possibily increase the compression ratio reducing the file size.
|
|
@@ -871,6 +880,7 @@ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fa
|
|
|
871
880
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
|
|
872
881
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
873
882
|
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
883
|
+
- correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
|
|
874
884
|
|
|
875
885
|
Returns:
|
|
876
886
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -879,12 +889,37 @@ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fa
|
|
|
879
889
|
- Exception: If the file is not found or there is a data format error.
|
|
880
890
|
|
|
881
891
|
"""
|
|
882
|
-
file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
892
|
+
file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
893
|
+
lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,
|
|
894
|
+
encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults,correctColumnNum = correctColumnNum)
|
|
883
895
|
if file:
|
|
884
896
|
clearTabularFile(fileName,teeLogger = teeLogger,header = header,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
885
897
|
appendLinesTabularFile(fileName,file,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
886
898
|
return file
|
|
887
899
|
|
|
900
|
+
def getListView(tsvzDic,header = [],delimiter = DEFAULT_DELIMITER):
|
|
901
|
+
if header:
|
|
902
|
+
if isinstance(header,str):
|
|
903
|
+
header = header.split(delimiter)
|
|
904
|
+
elif not isinstance(header,list):
|
|
905
|
+
try:
|
|
906
|
+
header = list(header)
|
|
907
|
+
except:
|
|
908
|
+
header = []
|
|
909
|
+
if not tsvzDic:
|
|
910
|
+
if not header:
|
|
911
|
+
return []
|
|
912
|
+
else:
|
|
913
|
+
return [header]
|
|
914
|
+
if not header:
|
|
915
|
+
return list(tsvzDic.values())
|
|
916
|
+
else:
|
|
917
|
+
values = list(tsvzDic.values())
|
|
918
|
+
if values[0] and values[0] == header:
|
|
919
|
+
return values
|
|
920
|
+
else:
|
|
921
|
+
return [header] + values
|
|
922
|
+
|
|
888
923
|
# create a tsv class that functions like a ordered dictionary but will update the file when modified
|
|
889
924
|
class TSVZed(OrderedDict):
|
|
890
925
|
def __teePrintOrNot(self,message,level = 'info'):
|
|
@@ -899,7 +934,9 @@ class TSVZed(OrderedDict):
|
|
|
899
934
|
def getResourseUsage(self,return_dict = False):
|
|
900
935
|
return get_resource_usage(return_dict = return_dict)
|
|
901
936
|
|
|
902
|
-
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,
|
|
937
|
+
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,
|
|
938
|
+
rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,
|
|
939
|
+
verbose = False,encoding = 'utf8',delimiter = ...,defualts = None,strict = False,correctColumnNum = -1):
|
|
903
940
|
super().__init__()
|
|
904
941
|
self.version = version
|
|
905
942
|
self.strict = strict
|
|
@@ -910,7 +947,7 @@ class TSVZed(OrderedDict):
|
|
|
910
947
|
self.delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
911
948
|
self.defaults = defualts if defualts else []
|
|
912
949
|
self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
|
|
913
|
-
self.correctColumnNum =
|
|
950
|
+
self.correctColumnNum = correctColumnNum
|
|
914
951
|
self.createIfNotExist = createIfNotExist
|
|
915
952
|
self.verifyHeader = verifyHeader
|
|
916
953
|
self.rewrite_on_load = rewrite_on_load
|
|
@@ -1105,6 +1142,9 @@ class TSVZed(OrderedDict):
|
|
|
1105
1142
|
self.__teePrintOrNot(f"Appending {emptyLine} to the appendQueue")
|
|
1106
1143
|
self.appendQueue.append(emptyLine)
|
|
1107
1144
|
return self
|
|
1145
|
+
|
|
1146
|
+
def getListView(self):
|
|
1147
|
+
return getListView(self,header=self.header,delimiter=self.delimiter)
|
|
1108
1148
|
|
|
1109
1149
|
def clear(self):
|
|
1110
1150
|
# clear the dictionary and update the file
|
|
@@ -1539,6 +1579,3 @@ def __main__():
|
|
|
1539
1579
|
print("Invalid operation")
|
|
1540
1580
|
if __name__ == '__main__':
|
|
1541
1581
|
__main__()
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
{tsvz-3.28 → tsvz-3.30}/setup.py
RENAMED
|
File without changes
|
|
File without changes
|