TSVZ 3.17__tar.gz → 3.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: TSVZ
3
- Version: 3.17
3
+ Version: 3.20
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: TSVZ
3
- Version: 3.17
3
+ Version: 3.20
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -22,7 +22,7 @@ if os.name == 'nt':
22
22
  elif os.name == 'posix':
23
23
  import fcntl
24
24
 
25
- version = '3.17'
25
+ version = '3.20'
26
26
  __version__ = version
27
27
  author = 'pan@zopyr.us'
28
28
 
@@ -30,12 +30,13 @@ DEFAULT_DELIMITER = '\t'
30
30
  DEFAULTS_INDICATOR_KEY = '#_defaults_#'
31
31
 
32
32
  def get_delimiter(delimiter,file_name = ''):
33
+ global DEFAULT_DELIMITER
33
34
  if not delimiter:
34
35
  return DEFAULT_DELIMITER
35
36
  elif delimiter == ...:
36
37
  if not file_name:
37
38
  rtn = '\t'
38
- if file_name.endswith('.csv'):
39
+ elif file_name.endswith('.csv'):
39
40
  rtn = ','
40
41
  elif file_name.endswith('.nsv'):
41
42
  rtn = '\0'
@@ -58,9 +59,10 @@ def get_delimiter(delimiter,file_name = ''):
58
59
 
59
60
  def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
60
61
  version = 1.11
62
+ _ = version
61
63
  if not data:
62
64
  return ''
63
- if type(data) == str:
65
+ if isinstance(data, str):
64
66
  data = data.strip('\n').split('\n')
65
67
  data = [line.split(delimiter) for line in data]
66
68
  elif isinstance(data, dict):
@@ -72,7 +74,7 @@ def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
72
74
  else:
73
75
  # it is a dict of lists
74
76
  data = [[key] + list(value) for key, value in data.items()]
75
- elif type(data) != list:
77
+ elif not isinstance(data,list):
76
78
  data = list(data)
77
79
  # format the list into 2d list of list of strings
78
80
  if isinstance(data[0], dict):
@@ -262,10 +264,10 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
262
264
  teeLogger.teelog(message,level)
263
265
  else:
264
266
  print(message,flush=True)
265
- except Exception as e:
267
+ except Exception:
266
268
  print(message,flush=True)
267
269
 
268
- def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = []):
270
+ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = None):
269
271
  """
270
272
  Process a line of text and update the task dictionary.
271
273
 
@@ -282,6 +284,8 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
282
284
  tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
283
285
 
284
286
  """
287
+ if not defaults:
288
+ defaults = []
285
289
  line = line.strip(' ').strip('\x00').rstrip('\r\n')
286
290
  # we throw away the lines that start with '#'
287
291
  if not line :
@@ -428,7 +432,7 @@ def _formatHeader(header,verbose = False,teeLogger = None,delimiter = DEFAULT_DE
428
432
  Returns:
429
433
  str: The formatted header string.
430
434
  """
431
- if type(header) != str:
435
+ if not isinstance(header,str):
432
436
  try:
433
437
  header = delimiter.join(header)
434
438
  except:
@@ -465,7 +469,7 @@ def _lineContainHeader(header,line,verbose = False,teeLogger = None,strict = Fal
465
469
  if len(header) != len(line) or any([header[i] not in line[i] for i in range(len(header))]):
466
470
  __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header}",teeLogger=teeLogger)
467
471
  if strict:
468
- raise Exception("Data format error! Header mismatch")
472
+ raise ValueError("Data format error! Header mismatch")
469
473
  return False
470
474
  return True
471
475
 
@@ -500,7 +504,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
500
504
  return True
501
505
  elif strict:
502
506
  __teePrintOrNot('File not found','error',teeLogger=teeLogger)
503
- raise Exception("File not found")
507
+ raise FileNotFoundError("File not found")
504
508
  else:
505
509
  return False
506
510
  return True
@@ -566,8 +570,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
566
570
  return taskDic
567
571
  with open(fileName, mode ='rb')as file:
568
572
  correctColumnNum = -1
569
- if header.rstrip():
570
- if verifyHeader:
573
+ if header.rstrip() and verifyHeader:
571
574
  line = file.readline().decode(encoding=encoding)
572
575
  if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
573
576
  correctColumnNum = len(header.split(delimiter))
@@ -647,21 +650,24 @@ def appendLinesTabularFile(fileName,linesToAppend,teeLogger = None,header = '',c
647
650
  return
648
651
  formatedLines = []
649
652
  for line in linesToAppend:
650
- if type(line) == str:
653
+ if isinstance(linesToAppend,dict):
654
+ key = line
655
+ line = linesToAppend[key]
656
+ if isinstance(line,str):
651
657
  line = line.split(delimiter)
652
- else:
658
+ elif line:
653
659
  for i in range(len(line)):
654
- if type(line[i]) != str:
660
+ if not isinstance(line[i],str):
655
661
  try:
656
662
  line[i] = str(line[i])
657
663
  except Exception as e:
658
664
  line[i] = str(e)
665
+ if isinstance(linesToAppend,dict) and not line or line[0] != key:
666
+ line = [key]+line
659
667
  formatedLines.append(line)
660
-
661
668
  with open(fileName, mode ='r+b')as file:
662
669
  correctColumnNum = max([len(line) for line in formatedLines])
663
- if header.rstrip():
664
- if verifyHeader:
670
+ if header.rstrip() and verifyHeader:
665
671
  line = file.readline().decode(encoding=encoding)
666
672
  if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
667
673
  correctColumnNum = len(header.split(delimiter))
@@ -711,7 +717,7 @@ def clearTabularFile(fileName,teeLogger = None,header = '',verifyHeader = False,
711
717
  delimiter = get_delimiter(delimiter,file_name=fileName)
712
718
  header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger,delimiter=delimiter)
713
719
  if not _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False,delimiter=delimiter):
714
- raise Exception("Something catastrophic happened! File still not found after creation")
720
+ raise FileNotFoundError("Something catastrophic happened! File still not found after creation")
715
721
  else:
716
722
  with open(fileName, mode ='r+',encoding=encoding)as file:
717
723
  if header.rstrip() and verifyHeader:
@@ -749,13 +755,13 @@ class TSVZed(OrderedDict):
749
755
  self.teeLogger.teelog(message,level)
750
756
  else:
751
757
  print(message,flush=True)
752
- except Exception as e:
758
+ except Exception:
753
759
  print(message,flush=True)
754
760
 
755
761
  def getResourseUsage(self,return_dict = False):
756
762
  return get_resource_usage(return_dict = return_dict)
757
763
 
758
- def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = [],strict = False):
764
+ def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = None,strict = False):
759
765
  super().__init__()
760
766
  self.version = version
761
767
  self.strict = strict
@@ -764,7 +770,7 @@ class TSVZed(OrderedDict):
764
770
  self._fileName = fileName
765
771
  self.teeLogger = teeLogger
766
772
  self.delimiter = get_delimiter(delimiter,file_name=fileName)
767
- self.defaults = defualts
773
+ self.defaults = defualts if defualts else []
768
774
  self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
769
775
  self.correctColumnNum = -1
770
776
  self.createIfNotExist = createIfNotExist
@@ -830,7 +836,12 @@ class TSVZed(OrderedDict):
830
836
  readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = self.strict, delimiter = self.delimiter, defaults=self.defaults)
831
837
  if self.verbose:
832
838
  self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
833
- self.correctColumnNum = len(self.header.split(self.delimiter)) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
839
+ if self.header and self.verifyHeader:
840
+ self.correctColumnNum = len(self.header.split(self.delimiter))
841
+ elif self:
842
+ self.correctColumnNum = len(self[next(iter(self))])
843
+ else:
844
+ self.correctColumnNum = -1
834
845
  if self.verbose:
835
846
  self.__teePrintOrNot(f"correctColumnNum: {self.correctColumnNum}")
836
847
  #super().update(loadedData)
@@ -846,10 +857,10 @@ class TSVZed(OrderedDict):
846
857
  if not key:
847
858
  self.__teePrintOrNot('Key cannot be empty','error')
848
859
  return
849
- if type(value) == str:
860
+ if isinstance(value,str):
850
861
  value = value.split(self.delimiter)
851
862
  # sanitize the value
852
- value = [(str(segment).rstrip() if type(segment) != str else segment.rstrip()) if segment else '' for segment in value]
863
+ value = [(str(segment).rstrip() if not isinstance(segment,str) else segment.rstrip()) if segment else '' for segment in value]
853
864
  # escape the delimiter and newline characters
854
865
  value = [segment.replace(self.delimiter,'<sep>').replace('\n','\\n') for segment in value]
855
866
  # the first field in value should be the key
@@ -996,9 +1007,12 @@ class TSVZed(OrderedDict):
996
1007
  def __enter__(self):
997
1008
  return self
998
1009
 
999
- def __exit__(self,exc_type,exc_value,traceback):
1010
+ def close(self):
1000
1011
  self.stopAppendThread()
1001
1012
  return self
1013
+
1014
+ def __exit__(self,exc_type,exc_value,traceback):
1015
+ return self.close()
1002
1016
 
1003
1017
  def __repr__(self):
1004
1018
  return f"""TSVZed(
@@ -1019,16 +1033,11 @@ deSynced:{self.deSynced}
1019
1033
  memoryOnly:{self.memoryOnly}
1020
1034
  {dict(self)})"""
1021
1035
 
1022
- def close(self):
1023
- self.stopAppendThread()
1024
- return self
1025
-
1026
1036
  def __str__(self):
1027
1037
  return f"TSVZed({self._fileName},{dict(self)})"
1028
1038
 
1029
1039
  def __del__(self):
1030
- self.stopAppendThread()
1031
- return self
1040
+ return self.close()
1032
1041
 
1033
1042
  def popitem(self, last=True):
1034
1043
  key, value = super().popitem(last)
@@ -1354,14 +1363,14 @@ def __main__():
1354
1363
  args.header += '\\'
1355
1364
  try:
1356
1365
  header = args.header.encode().decode('unicode_escape') if args.header else ''
1357
- except Exception as e:
1366
+ except Exception:
1358
1367
  print(f"Failed to decode header: {args.header}")
1359
1368
  header = ''
1360
1369
  defaults = []
1361
1370
  if args.defaults:
1362
1371
  try:
1363
1372
  defaults = args.defaults.encode().decode('unicode_escape').split(args.delimiter)
1364
- except Exception as e:
1373
+ except Exception:
1365
1374
  print(f"Failed to decode defaults: {args.defaults}")
1366
1375
  defaults = []
1367
1376
 
@@ -1380,9 +1389,7 @@ def __main__():
1380
1389
  elif args.operation == 'clear':
1381
1390
  clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
1382
1391
  else:
1383
- print("Invalid operation")
1384
- return
1385
-
1392
+ print("Invalid operation")
1386
1393
  if __name__ == '__main__':
1387
1394
  __main__()
1388
1395
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes