TSVZ 3.2__tar.gz → 3.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: TSVZ
3
- Version: 3.2
3
+ Version: 3.10
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: TSVZ
3
- Version: 3.2
3
+ Version: 3.10
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -11,10 +11,11 @@ if os.name == 'nt':
11
11
  elif os.name == 'posix':
12
12
  import fcntl
13
13
 
14
- version = '3.02'
14
+ version = '3.10'
15
15
  author = 'pan@zopyr.us'
16
16
 
17
17
  DEFAULT_DELIMITER = '\t'
18
+ DEFAULTS_INDICATOR_KEY = '#_defaults_#'
18
19
 
19
20
  def get_delimiter(delimiter,file_name = ''):
20
21
  if not delimiter:
@@ -109,7 +110,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
109
110
  except Exception as e:
110
111
  print(message,flush=True)
111
112
 
112
- def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER):
113
+ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = []):
113
114
  """
114
115
  Process a line of text and update the task dictionary.
115
116
 
@@ -120,6 +121,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
120
121
  verbose (bool, optional): Whether to print verbose output. Defaults to False.
121
122
  teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
122
123
  strict (bool, optional): Whether to strictly enforce the correct number of columns. Defaults to True.
124
+ defaults (list, optional): The default values to use for missing columns. Defaults to [].
123
125
 
124
126
  Returns:
125
127
  tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
@@ -131,36 +133,40 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
131
133
  if verbose:
132
134
  __teePrintOrNot(f"Ignoring empty line: {line}",teeLogger=teeLogger)
133
135
  return correctColumnNum , []
134
- if line.startswith('#'):
136
+ if line.startswith('#') and not line.startswith(DEFAULTS_INDICATOR_KEY):
135
137
  if verbose:
136
138
  __teePrintOrNot(f"Ignoring comment line: {line}",teeLogger=teeLogger)
137
139
  return correctColumnNum , []
138
140
  # we only interested in the lines that have the correct number of columns
139
- lineCache = [segment.strip() for segment in line.split(delimiter)]
141
+ lineCache = [segment.rstrip() for segment in line.split(delimiter)]
140
142
  if not lineCache:
141
143
  return correctColumnNum , []
142
144
  if correctColumnNum == -1:
145
+ if defaults and len(defaults) > 1:
146
+ correctColumnNum = len(defaults)
147
+ else:
148
+ correctColumnNum = len(lineCache)
143
149
  if verbose:
144
150
  __teePrintOrNot(f"detected correctColumnNum: {len(lineCache)}",teeLogger=teeLogger)
145
- correctColumnNum = len(lineCache)
146
151
  if not lineCache[0]:
147
152
  if verbose:
148
153
  __teePrintOrNot(f"Ignoring line with empty key: {line}",teeLogger=teeLogger)
149
154
  return correctColumnNum , []
150
155
  if len(lineCache) == 1 or not any(lineCache[1:]):
151
- if correctColumnNum == 1: taskDic[lineCache[0]] = lineCache
156
+ if correctColumnNum == 1:
157
+ taskDic[lineCache[0]] = lineCache
158
+ elif lineCache[0] == DEFAULTS_INDICATOR_KEY:
159
+ if verbose:
160
+ __teePrintOrNot(f"Empty defaults line found: {line}",teeLogger=teeLogger)
161
+ defaults = []
152
162
  else:
153
163
  if verbose:
154
164
  __teePrintOrNot(f"Key {lineCache[0]} found with empty value, deleting such key's representaion",teeLogger=teeLogger)
155
165
  if lineCache[0] in taskDic:
156
166
  del taskDic[lineCache[0]]
157
167
  return correctColumnNum , []
158
- elif len(lineCache) == correctColumnNum:
159
- taskDic[lineCache[0]] = lineCache
160
- if verbose:
161
- __teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
162
- else:
163
- if strict:
168
+ elif len(lineCache) != correctColumnNum:
169
+ if strict and not any(defaults):
164
170
  if verbose:
165
171
  __teePrintOrNot(f"Ignoring line with {len(lineCache)} columns: {line}",teeLogger=teeLogger)
166
172
  return correctColumnNum , []
@@ -170,12 +176,26 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
170
176
  lineCache += ['']*(correctColumnNum-len(lineCache))
171
177
  elif len(lineCache) > correctColumnNum:
172
178
  lineCache = lineCache[:correctColumnNum]
173
- taskDic[lineCache[0]] = lineCache
174
179
  if verbose:
175
- __teePrintOrNot(f"Key {lineCache[0]} added after correction",teeLogger=teeLogger)
180
+ __teePrintOrNot(f"Correcting {lineCache[0]}",teeLogger=teeLogger)
181
+ # now replace empty values with defaults
182
+ if defaults and len(defaults) > 1:
183
+ for i in range(1,len(lineCache)):
184
+ if not lineCache[i] and i < len(defaults) and defaults[i]:
185
+ lineCache[i] = defaults[i]
186
+ if verbose:
187
+ __teePrintOrNot(f"Replacing empty value at {i} with default: {defaults[i]}",teeLogger=teeLogger)
188
+ if lineCache[0] == DEFAULTS_INDICATOR_KEY:
189
+ if verbose:
190
+ __teePrintOrNot(f"Defaults line found: {line}",teeLogger=teeLogger)
191
+ defaults = lineCache
192
+ return correctColumnNum , []
193
+ taskDic[lineCache[0]] = lineCache
194
+ if verbose:
195
+ __teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
176
196
  return correctColumnNum, lineCache
177
197
 
178
- def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...):
198
+ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults = []):
179
199
  """
180
200
  Reads the last valid line from a file.
181
201
 
@@ -187,6 +207,8 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
187
207
  teeLogger (optional): Logger to use for tee print. Defaults to None.
188
208
  encoding (str, optional): The encoding of the file. Defaults to None.
189
209
  strict (bool, optional): Whether to enforce strict processing. Defaults to False.
210
+ delimiter (str, optional): The delimiter used in the file. Defaults to None.
211
+ defaults (list, optional): The default values to use for missing columns. Defaults to [].
190
212
 
191
213
  Returns:
192
214
  list: The last valid line data processed by processLine, or an empty list if none found.
@@ -220,13 +242,14 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
220
242
  if lines[i].strip(): # Skip empty lines
221
243
  # Process the line
222
244
  correctColumnNum, lineCache = _processLine(
223
- lines[i].decode(encoding=encoding),
224
- taskDic,
225
- correctColumnNum,
245
+ line=lines[i].decode(encoding=encoding),
246
+ taskDic=taskDic,
247
+ correctColumnNum=correctColumnNum,
226
248
  verbose=verbose,
227
249
  teeLogger=teeLogger,
228
250
  strict=strict,
229
- delimiter=delimiter
251
+ delimiter=delimiter,
252
+ defaults=defaults,
230
253
  )
231
254
  # If the line is valid, return it
232
255
  if lineCache and any(lineCache):
@@ -327,7 +350,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
327
350
  return False
328
351
  return True
329
352
 
330
- def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t'):
353
+ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = []):
331
354
  """
332
355
  Compatibility method, calls readTabularFile.
333
356
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
@@ -344,6 +367,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
344
367
  - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
345
368
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
346
369
  - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
370
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
347
371
 
348
372
  Returns:
349
373
  - OrderedDict: The dictionary containing the data from the Tabular file.
@@ -352,9 +376,9 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
352
376
  - Exception: If the file is not found or there is a data format error.
353
377
 
354
378
  """
355
- return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter)
379
+ return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
356
380
 
357
- def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...):
381
+ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = []):
358
382
  """
359
383
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
360
384
 
@@ -370,6 +394,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
370
394
  - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
371
395
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
372
396
  - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
397
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
373
398
 
374
399
  Returns:
375
400
  - OrderedDict: The dictionary containing the data from the Tabular file.
@@ -394,12 +419,12 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
394
419
  if verbose:
395
420
  __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
396
421
  if lastLineOnly:
397
- lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter)
422
+ lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter, defaults=defaults)
398
423
  if lineCache:
399
424
  taskDic[lineCache[0]] = lineCache
400
425
  return lineCache
401
426
  for line in file:
402
- correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter)
427
+ correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
403
428
  return taskDic
404
429
 
405
430
  def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
@@ -446,7 +471,7 @@ def appendTabularFile(fileName,lineToAppend,teeLogger = None,header = '',createI
446
471
  if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
447
472
  return
448
473
  if type(lineToAppend) == str:
449
- lineToAppend = lineToAppend.strip().split(delimiter)
474
+ lineToAppend = lineToAppend.split(delimiter)
450
475
  else:
451
476
  for i in range(len(lineToAppend)):
452
477
  if type(lineToAppend[i]) != str:
@@ -548,14 +573,16 @@ class TSVZed(OrderedDict):
548
573
  except Exception as e:
549
574
  print(message,flush=True)
550
575
 
551
- def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...):
576
+ def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = [],strict = False):
552
577
  super().__init__()
553
578
  self.version = version
579
+ self.strict = strict
554
580
  self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
555
581
  self.lastUpdateTime = self.externalFileUpdateTime
556
582
  self._fileName = fileName
557
583
  self.teeLogger = teeLogger
558
584
  self.delimiter = get_delimiter(delimiter,file_name=fileName)
585
+ self.defaults = defualts
559
586
  self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
560
587
  self.correctColumnNum = -1
561
588
  self.createIfNotExist = createIfNotExist
@@ -584,6 +611,27 @@ class TSVZed(OrderedDict):
584
611
  self.load()
585
612
  atexit.register(self.stopAppendThread)
586
613
 
614
+ def setDefaults(self,defaults):
615
+ if not defaults:
616
+ defaults = []
617
+ return
618
+ if isinstance(defaults,str):
619
+ defaults = defaults.split(self.delimiter)
620
+ elif not isinstance(defaults,list):
621
+ try:
622
+ defaults = list(defaults)
623
+ except:
624
+ if self.verbose:
625
+ self.__teePrintOrNot('Invalid defaults, setting defaults to empty.','error')
626
+ defaults = []
627
+ return
628
+ if not any(defaults):
629
+ defaults = []
630
+ return
631
+ if defaults[0] != DEFAULTS_INDICATOR_KEY:
632
+ defaults = [DEFAULTS_INDICATOR_KEY]+defaults
633
+ self.defaults = defaults
634
+
587
635
  def load(self):
588
636
  self.reload()
589
637
  if self.rewrite_on_load:
@@ -597,7 +645,7 @@ class TSVZed(OrderedDict):
597
645
  if self.verbose:
598
646
  self.__teePrintOrNot(f"Loading {self._fileName}")
599
647
  super().clear()
600
- readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = False, delimiter = self.delimiter)
648
+ readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = self.strict, delimiter = self.delimiter, defaults=self.defaults)
601
649
  if self.verbose:
602
650
  self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
603
651
  self.correctColumnNum = len(self.header.split(self.delimiter)) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
@@ -612,30 +660,55 @@ class TSVZed(OrderedDict):
612
660
  return self
613
661
 
614
662
  def __setitem__(self,key,value):
615
- key = str(key).strip()
663
+ key = str(key).rstrip()
616
664
  if not key:
617
665
  self.__teePrintOrNot('Key cannot be empty','error')
618
666
  return
619
667
  if type(value) == str:
620
- value = value.strip().split(self.delimiter)
668
+ value = value.split(self.delimiter)
621
669
  # sanitize the value
622
- value = [(str(segment).strip() if type(segment) != str else segment.strip()) if segment else '' for segment in value]
623
- #value = list(map(lambda segment: str(segment).strip(), value))
670
+ value = [(str(segment).rstrip() if type(segment) != str else segment.rstrip()) if segment else '' for segment in value]
671
+ # escape the delimiter and newline characters
672
+ value = [segment.replace(self.delimiter,'<sep>').replace('\n','\\n') for segment in value]
624
673
  # the first field in value should be the key
625
674
  # add it if it is not there
626
675
  if not value or value[0] != key:
627
676
  value = [key]+value
628
677
  # verify the value has the correct number of columns
629
678
  if self.correctColumnNum != 1 and len(value) == 1:
630
- # this means we want to clear / deelte the key
679
+ # this means we want to clear / delete the key
631
680
  self.__delitem__(key)
632
681
  elif self.correctColumnNum > 0:
633
- assert len(value) == self.correctColumnNum, f"Data format error! Expected {self.correctColumnNum} columns, but got {len(value) } columns"
682
+ if len(value) != self.correctColumnNum:
683
+ if self.strict:
684
+ self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}. Refuse adding key...",'error')
685
+ return
686
+ elif self.verbose:
687
+ self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}, correcting...",'warning')
688
+ if len(value) < self.correctColumnNum:
689
+ value += ['']*(self.correctColumnNum-len(value))
690
+ elif len(value) > self.correctColumnNum:
691
+ value = value[:self.correctColumnNum]
634
692
  else:
635
693
  self.correctColumnNum = len(value)
694
+ if self.defaults and len(self.defaults) > 1:
695
+ for i in range(1,len(value)):
696
+ if not value[i] and i < len(self.defaults) and self.defaults[i]:
697
+ value[i] = self.defaults[i]
698
+ if self.verbose:
699
+ self.__teePrintOrNot(f" Replacing empty value at {i} with default: {self.defaults[i]}")
700
+ if key == DEFAULTS_INDICATOR_KEY:
701
+ self.defaults = value
702
+ if self.verbose:
703
+ self.__teePrintOrNot(f"Defaults set to {value}")
704
+ if not self.memoryOnly:
705
+ self.appendQueue.append(self.delimiter.join(value))
706
+ self.lastUpdateTime = get_time_ns()
707
+ if self.verbose:
708
+ self.__teePrintOrNot(f"Appending Defaults {key} to the appendQueue")
709
+ return
636
710
  if self.verbose:
637
711
  self.__teePrintOrNot(f"Setting {key} to {value}")
638
-
639
712
  if key in self:
640
713
  if self[key] == value:
641
714
  if self.verbose:
@@ -644,9 +717,13 @@ class TSVZed(OrderedDict):
644
717
  self.dirty = True
645
718
  # update the dictionary,
646
719
  super().__setitem__(key,value)
647
- if self.verbose:
648
- self.__teePrintOrNot(f"Key {key} updated")
649
720
  if self.memoryOnly:
721
+ if self.verbose:
722
+ self.__teePrintOrNot(f"Key {key} updated in memory only")
723
+ return
724
+ elif key.startswith('#'):
725
+ if self.verbose:
726
+ self.__teePrintOrNot(f"Key {key} updated in memory only as it starts with #")
650
727
  return
651
728
  if self.verbose:
652
729
  self.__teePrintOrNot(f"Appending {key} to the appendQueue")
@@ -659,16 +736,29 @@ class TSVZed(OrderedDict):
659
736
 
660
737
 
661
738
  def __delitem__(self,key):
662
- key = str(key).strip()
739
+ key = str(key).rstrip()
740
+ if key == DEFAULTS_INDICATOR_KEY:
741
+ self.defaults = []
742
+ if self.verbose:
743
+ self.__teePrintOrNot(f"Defaults cleared")
744
+ if not self.memoryOnly:
745
+ self.__appendEmptyLine(key)
746
+ if self.verbose:
747
+ self.__teePrintOrNot(f"Appending empty default line {key}")
748
+ return
663
749
  # delete the key from the dictionary and update the file
664
750
  if key not in self:
665
751
  if self.verbose:
666
752
  self.__teePrintOrNot(f"Key {key} not found")
667
753
  return
668
754
  super().__delitem__(key)
669
- if self.memoryOnly:
755
+ if self.memoryOnly or key.startswith('#'):
756
+ if self.verbose:
757
+ self.__teePrintOrNot(f"Key {key} deleted in memory")
670
758
  return
671
759
  self.__appendEmptyLine(key)
760
+ if self.verbose:
761
+ self.__teePrintOrNot(f"Appending empty line {key}")
672
762
  self.lastUpdateTime = get_time_ns()
673
763
 
674
764
  def __appendEmptyLine(self,key):
@@ -868,30 +958,35 @@ memoryOnly:{self.memoryOnly}
868
958
  return self
869
959
 
870
960
  def mapToFile(self):
961
+ mec = self.monitor_external_changes
962
+ self.monitor_external_changes = False
871
963
  try:
872
964
  if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
873
965
  self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
874
966
  file = self.get_file_obj('r+b')
875
967
  overWrite = False
876
- line = file.readline().decode(self.encoding)
877
- aftPos = file.tell()
878
- if self.header and not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = False):
879
- file.seek(0)
880
- file.write(f'{self.header}\n'.encode(encoding=self.encoding))
881
- # if the header is not the same length as the line, we need to overwrite the file
882
- if aftPos != file.tell():
883
- overWrite = True
884
- if self.verbose:
885
- self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
968
+ if self.header:
969
+ line = file.readline().decode(self.encoding)
970
+ aftPos = file.tell()
971
+ if not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = self.strict):
972
+ file.seek(0)
973
+ file.write(f'{self.header}\n'.encode(encoding=self.encoding))
974
+ # if the header is not the same length as the line, we need to overwrite the file
975
+ if aftPos != file.tell():
976
+ overWrite = True
977
+ if self.verbose:
978
+ self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
886
979
  for value in self.values():
887
- strToWrite = self.delimiter.join(value)+'\n'
980
+ if value[0].startswith('#'):
981
+ continue
982
+ strToWrite = self.delimiter.join(value)
888
983
  if overWrite:
889
984
  if self.verbose:
890
985
  self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
891
- file.write(strToWrite.encode(encoding=self.encoding))
986
+ file.write(strToWrite.encode(encoding=self.encoding)+b'\n')
892
987
  continue
893
988
  pos = file.tell()
894
- line = file.readline().decode(encoding=self.encoding)
989
+ line = file.readline()
895
990
  aftPos = file.tell()
896
991
  if not line or pos == aftPos:
897
992
  if self.verbose:
@@ -899,13 +994,14 @@ memoryOnly:{self.memoryOnly}
899
994
  file.write(strToWrite.encode(encoding=self.encoding))
900
995
  overWrite = True
901
996
  continue
997
+ strToWrite = strToWrite.encode(encoding=self.encoding).ljust(len(line)-1)+b'\n'
902
998
  if line != strToWrite:
903
999
  if self.verbose:
904
- self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
1000
+ self.__teePrintOrNot(f"Modifing {value} to {self._fileName}")
905
1001
  file.seek(pos)
906
1002
  # fill the string with space to write to the correct length
907
1003
  #file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
908
- file.write(strToWrite.encode(encoding=self.encoding).rstrip(b'\n').ljust(len(line)-1)+b'\n')
1004
+ file.write(strToWrite)
909
1005
  if aftPos != file.tell():
910
1006
  overWrite = True
911
1007
  file.truncate()
@@ -921,6 +1017,8 @@ memoryOnly:{self.memoryOnly}
921
1017
  import traceback
922
1018
  self.__teePrintOrNot(traceback.format_exc(),'error')
923
1019
  self.deSynced = True
1020
+ self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
1021
+ self.monitor_external_changes = mec
924
1022
  return self
925
1023
 
926
1024
  def checkExternalChanges(self):
@@ -1062,7 +1160,10 @@ def __main__():
1062
1160
  parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
1063
1161
  parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
1064
1162
  parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
1065
- parser.add_argument('-f', '--force', action='store_true', help='Force the operation. Ignore checks for column numbers / headers')
1163
+ parser.add_argument('--defaults', type=str, help='Default values to fill in the missing columns. seperate using --delimiter. Ex. if -d = comma, --defaults="key,value1,value2..." Note: Please specify the key. But it will not be used as a key need to be unique in data.')
1164
+ strictMode = parser.add_mutually_exclusive_group()
1165
+ strictMode.add_argument('-s', '--strict', dest = 'strict',action='store_true', help='Strict mode. Do not parse values that seems malformed, check for column numbers / headers')
1166
+ strictMode.add_argument('-f', '--force', dest = 'strict',action='store_false', help='Force the operation. Ignore checks for column numbers / headers')
1066
1167
  parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
1067
1168
  parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
1068
1169
  args = parser.parse_args()
@@ -1074,6 +1175,13 @@ def __main__():
1074
1175
  except Exception as e:
1075
1176
  print(f"Failed to decode header: {args.header}")
1076
1177
  header = ''
1178
+ defaults = []
1179
+ if args.defaults:
1180
+ try:
1181
+ defaults = args.defaults.encode().decode('unicode_escape').split(args.delimiter)
1182
+ except Exception as e:
1183
+ print(f"Failed to decode defaults: {args.defaults}")
1184
+ defaults = []
1077
1185
 
1078
1186
  if args.operation == 'read':
1079
1187
  # check if the file exist
@@ -1081,14 +1189,14 @@ def __main__():
1081
1189
  print(f"File not found: {args.filename}")
1082
1190
  return
1083
1191
  # read the file
1084
- data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= not args.force, delimiter=args.delimiter)
1192
+ data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= args.strict, delimiter=args.delimiter, defaults=defaults)
1085
1193
  print(pretty_format_table(data.values(),delimiter=args.delimiter))
1086
1194
  elif args.operation == 'append':
1087
- appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
1195
+ appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
1088
1196
  elif args.operation == 'delete':
1089
- appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
1197
+ appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
1090
1198
  elif args.operation == 'clear':
1091
- clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=not args.force, delimiter=args.delimiter)
1199
+ clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
1092
1200
  else:
1093
1201
  print("Invalid operation")
1094
1202
  return
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes