TSVZ 3.29__py3-none-any.whl → 3.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
TSVZ.py CHANGED
@@ -22,10 +22,10 @@ if os.name == 'nt':
22
22
  elif os.name == 'posix':
23
23
  import fcntl
24
24
 
25
- version = '3.29'
25
+ version = '3.30'
26
26
  __version__ = version
27
27
  author = 'pan@zopyr.us'
28
- COMMIT_DATE = '2025-08-11'
28
+ COMMIT_DATE = '2025-09-15'
29
29
 
30
30
  DEFAULT_DELIMITER = '\t'
31
31
  DEFAULTS_INDICATOR_KEY = '#_defaults_#'
@@ -573,7 +573,9 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
573
573
  return False
574
574
  return True
575
575
 
576
- def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = ...):
576
+ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
577
+ verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = ...,
578
+ correctColumnNum = -1):
577
579
  """
578
580
  Compatibility method, calls readTabularFile.
579
581
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
@@ -591,6 +593,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
591
593
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
592
594
  - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
593
595
  - defaults (list, optional): The default values to use for missing columns. Defaults to [].
596
+ - correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
594
597
 
595
598
  Returns:
596
599
  - OrderedDict: The dictionary containing the data from the Tabular file.
@@ -599,9 +602,14 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
599
602
  - Exception: If the file is not found or there is a data format error.
600
603
 
601
604
  """
602
- return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
603
-
604
- def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = ...):
605
+ return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
606
+ lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,
607
+ encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults,
608
+ correctColumnNum = correctColumnNum)
609
+
610
+ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
611
+ verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = ...,
612
+ correctColumnNum = -1):
605
613
  """
606
614
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
607
615
 
@@ -618,6 +626,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
618
626
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
619
627
  - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
620
628
  - defaults (list, optional): The default values to use for missing columns. Defaults to [].
629
+ - correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
621
630
 
622
631
  Returns:
623
632
  - OrderedDict: The dictionary containing the data from the Tabular file.
@@ -635,10 +644,9 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
635
644
  if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
636
645
  return taskDic
637
646
  with openFileAsCompressed(fileName, mode ='rb',encoding=encoding,teeLogger=teeLogger)as file:
638
- correctColumnNum = -1
639
647
  if header.rstrip() and verifyHeader:
640
648
  line = file.readline().decode(encoding=encoding,errors='replace')
641
- if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
649
+ if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict) and correctColumnNum == -1:
642
650
  correctColumnNum = len(header.split(delimiter))
643
651
  if verbose:
644
652
  __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
@@ -852,7 +860,8 @@ def scrubTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, las
852
860
  """
853
861
  return scrubTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
854
862
 
855
- def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = ...,defaults = ...):
863
+ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,
864
+ verbose = False,taskDic = None,encoding = 'utf8',strict = False,delimiter = ...,defaults = ...,correctColumnNum = -1):
856
865
  """
857
866
  Scrub a Tabular (CSV / TSV / NSV) file by reading it and writing the contents back into the file.
858
867
  If using compressed files. This will recompress the file in whole and possibily increase the compression ratio reducing the file size.
@@ -871,6 +880,7 @@ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fa
871
880
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
872
881
  - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
873
882
  - defaults (list, optional): The default values to use for missing columns. Defaults to [].
883
+ - correctColumnNum (int, optional): The expected number of columns in the file. If -1, it will be determined from the first valid line. Defaults to -1.
874
884
 
875
885
  Returns:
876
886
  - OrderedDict: The dictionary containing the data from the Tabular file.
@@ -879,7 +889,9 @@ def scrubTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fa
879
889
  - Exception: If the file is not found or there is a data format error.
880
890
 
881
891
  """
882
- file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
892
+ file = readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,
893
+ lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,
894
+ encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults,correctColumnNum = correctColumnNum)
883
895
  if file:
884
896
  clearTabularFile(fileName,teeLogger = teeLogger,header = header,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
885
897
  appendLinesTabularFile(fileName,file,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
@@ -922,7 +934,9 @@ class TSVZed(OrderedDict):
922
934
  def getResourseUsage(self,return_dict = False):
923
935
  return get_resource_usage(return_dict = return_dict)
924
936
 
925
- def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = None,strict = False):
937
+ def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,
938
+ rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,
939
+ verbose = False,encoding = 'utf8',delimiter = ...,defualts = None,strict = False,correctColumnNum = -1):
926
940
  super().__init__()
927
941
  self.version = version
928
942
  self.strict = strict
@@ -933,7 +947,7 @@ class TSVZed(OrderedDict):
933
947
  self.delimiter = get_delimiter(delimiter,file_name=fileName)
934
948
  self.defaults = defualts if defualts else []
935
949
  self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
936
- self.correctColumnNum = -1
950
+ self.correctColumnNum = correctColumnNum
937
951
  self.createIfNotExist = createIfNotExist
938
952
  self.verifyHeader = verifyHeader
939
953
  self.rewrite_on_load = rewrite_on_load
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TSVZ
3
- Version: 3.29
3
+ Version: 3.30
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -0,0 +1,6 @@
1
+ TSVZ.py,sha256=Pga6KuYLChU7LKmE9yqZRVC10D8qivq_nt0EqO0BBjk,79376
2
+ tsvz-3.30.dist-info/METADATA,sha256=OMf1LxZs4tP04KeUnYjRV42D-9Gy7xJHTXsZGgS7QI8,1826
3
+ tsvz-3.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
4
+ tsvz-3.30.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
+ tsvz-3.30.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
+ tsvz-3.30.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- TSVZ.py,sha256=mULdGJvv_LcO4bGGRWjg-esutV3EahLywvRa6oRkynQ,78467
2
- tsvz-3.29.dist-info/METADATA,sha256=kS1cnrQ2wyqj5qVhnVlbCSjdWLeJkhbPqLwFnDX-QpY,1826
3
- tsvz-3.29.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
4
- tsvz-3.29.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
- tsvz-3.29.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
- tsvz-3.29.dist-info/RECORD,,
File without changes