TSVZ 3.29__tar.gz → 3.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsvz-3.29 → tsvz-3.30}/PKG-INFO +1 -1
- {tsvz-3.29 → tsvz-3.30}/TSVZ.egg-info/PKG-INFO +1 -1
- {tsvz-3.29 → tsvz-3.30}/TSVZ.py +26 -12
- {tsvz-3.29 → tsvz-3.30}/README.md +0 -0
- {tsvz-3.29 → tsvz-3.30}/TSVZ.egg-info/SOURCES.txt +0 -0
- {tsvz-3.29 → tsvz-3.30}/TSVZ.egg-info/dependency_links.txt +0 -0
- {tsvz-3.29 → tsvz-3.30}/TSVZ.egg-info/entry_points.txt +0 -0
- {tsvz-3.29 → tsvz-3.30}/TSVZ.egg-info/top_level.txt +0 -0
- {tsvz-3.29 → tsvz-3.30}/setup.cfg +0 -0
- {tsvz-3.29 → tsvz-3.30}/setup.py +0 -0
{tsvz-3.29 → tsvz-3.30}/PKG-INFO
RENAMED
{tsvz-3.29 → tsvz-3.30}/TSVZ.py
RENAMED
|
@@ -22,10 +22,10 @@ if os.name == 'nt':
|
|
|
22
22
|
elif os.name == 'posix':
|
|
23
23
|
import fcntl
|
|
24
24
|
|
|
25
|
-
version = '3.
|
|
25
|
+
version = '3.30'
|
|
26
26
|
__version__ = version
|
|
27
27
|
author = 'pan@zopyr.us'
|
|
28
|
-
COMMIT_DATE = '2025-
|
|
28
|
+
COMMIT_DATE = '2025-09-15'
|
|
29
29
|
|
|
30
30
|
DEFAULT_DELIMITER = '\t'
|
|
31
31
|
DEFAULTS_INDICATOR_KEY = '#_defaults_#'
|
|
@@ -573,7 +573,9 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
|
|
|
573
573
|
return False
|
|
574
574
|
return True
|
|
575
575
|
|
|
576
|
-
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
576
|
+
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
577
|
+
verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = ...,
|
|
578
|
+
correctColumnNum = -1):
|
|
577
579
|
"""
|
|
578
580
|
Compatibility method, calls readTabularFile.
|
|
579
581
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
@@ -591,6 +593,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
591
593
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
592
594
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
|
|
593
595
|
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
596
|
+
- correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
|
|
594
597
|
|
|
595
598
|
Returns:
|
|
596
599
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -599,9 +602,14 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
599
602
|
- Exception: If the file is not found or there is a data format error.
|
|
600
603
|
|
|
601
604
|
"""
|
|
602
|
-
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
603
|
-
|
|
604
|
-
|
|
605
|
+
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
606
|
+
lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,
|
|
607
|
+
encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults,
|
|
608
|
+
correctColumnNum = correctColumnNum)
|
|
609
|
+
|
|
610
|
+
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
611
|
+
verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = ...,
|
|
612
|
+
correctColumnNum = -1):
|
|
605
613
|
"""
|
|
606
614
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
607
615
|
|
|
@@ -618,6 +626,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
618
626
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
619
627
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
620
628
|
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
629
|
+
- correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
|
|
621
630
|
|
|
622
631
|
Returns:
|
|
623
632
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -635,10 +644,9 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
635
644
|
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
636
645
|
return taskDic
|
|
637
646
|
with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
|
|
638
|
-
correctColumnNum = -1
|
|
639
647
|
if header.rstrip() and verifyHeader:
|
|
640
648
|
line = file.readline().decode(encoding=encoding,errors='replace')
|
|
641
|
-
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
649
|
+
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict) and correctColumnNum == -1:
|
|
642
650
|
correctColumnNum = len(header.split(delimiter))
|
|
643
651
|
if verbose:
|
|
644
652
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
@@ -852,7 +860,8 @@ def scrubTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, las
|
|
|
852
860
|
"""
|
|
853
861
|
return scrubTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
|
|
854
862
|
|
|
855
|
-
def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
863
|
+
def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
|
|
864
|
+
verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = ...,defaults = ...,correctColumnNum = -1):
|
|
856
865
|
"""
|
|
857
866
|
Scrub a Tabular (CSV / TSV / NSV) file by reading it and writing the contents back into the file.
|
|
858
867
|
If using compressed files. This will recompress the file in whole and possibily increase the compression ratio reducing the file size.
|
|
@@ -871,6 +880,7 @@ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fa
|
|
|
871
880
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
|
|
872
881
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
873
882
|
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
883
|
+
- correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
|
|
874
884
|
|
|
875
885
|
Returns:
|
|
876
886
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -879,7 +889,9 @@ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fa
|
|
|
879
889
|
- Exception: If the file is not found or there is a data format error.
|
|
880
890
|
|
|
881
891
|
"""
|
|
882
|
-
file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
892
|
+
file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
|
|
893
|
+
lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,
|
|
894
|
+
encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults,correctColumnNum = correctColumnNum)
|
|
883
895
|
if file:
|
|
884
896
|
clearTabularFile(fileName,teeLogger = teeLogger,header = header,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
885
897
|
appendLinesTabularFile(fileName,file,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
@@ -922,7 +934,9 @@ class TSVZed(OrderedDict):
|
|
|
922
934
|
def getResourseUsage(self,return_dict = False):
|
|
923
935
|
return get_resource_usage(return_dict = return_dict)
|
|
924
936
|
|
|
925
|
-
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,
|
|
937
|
+
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,
|
|
938
|
+
rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,
|
|
939
|
+
verbose = False,encoding = 'utf8',delimiter = ...,defualts = None,strict = False,correctColumnNum = -1):
|
|
926
940
|
super().__init__()
|
|
927
941
|
self.version = version
|
|
928
942
|
self.strict = strict
|
|
@@ -933,7 +947,7 @@ class TSVZed(OrderedDict):
|
|
|
933
947
|
self.delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
934
948
|
self.defaults = defualts if defualts else []
|
|
935
949
|
self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
|
|
936
|
-
self.correctColumnNum =
|
|
950
|
+
self.correctColumnNum = correctColumnNum
|
|
937
951
|
self.createIfNotExist = createIfNotExist
|
|
938
952
|
self.verifyHeader = verifyHeader
|
|
939
953
|
self.rewrite_on_load = rewrite_on_load
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tsvz-3.29 → tsvz-3.30}/setup.py
RENAMED
|
File without changes
|