TSVZ 3.2__py3-none-any.whl → 3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {TSVZ-3.2.dist-info → TSVZ-3.10.dist-info}/METADATA +1 -1
- TSVZ-3.10.dist-info/RECORD +6 -0
- TSVZ.py +166 -58
- TSVZ-3.2.dist-info/RECORD +0 -6
- {TSVZ-3.2.dist-info → TSVZ-3.10.dist-info}/WHEEL +0 -0
- {TSVZ-3.2.dist-info → TSVZ-3.10.dist-info}/entry_points.txt +0 -0
- {TSVZ-3.2.dist-info → TSVZ-3.10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
TSVZ.py,sha256=-B6hoHwwmvspnIlKIOjJFVmd9UDISpP33xwZyn5Ys8I,59836
|
|
2
|
+
TSVZ-3.10.dist-info/METADATA,sha256=Wm6nwGtoMLKmCjD-bCKdCKvEOmHkxG077P1t2zvIE54,1826
|
|
3
|
+
TSVZ-3.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
4
|
+
TSVZ-3.10.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
|
|
5
|
+
TSVZ-3.10.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
|
|
6
|
+
TSVZ-3.10.dist-info/RECORD,,
|
TSVZ.py
CHANGED
|
@@ -11,10 +11,11 @@ if os.name == 'nt':
|
|
|
11
11
|
elif os.name == 'posix':
|
|
12
12
|
import fcntl
|
|
13
13
|
|
|
14
|
-
version = '3.
|
|
14
|
+
version = '3.10'
|
|
15
15
|
author = 'pan@zopyr.us'
|
|
16
16
|
|
|
17
17
|
DEFAULT_DELIMITER = '\t'
|
|
18
|
+
DEFAULTS_INDICATOR_KEY = '#_defaults_#'
|
|
18
19
|
|
|
19
20
|
def get_delimiter(delimiter,file_name = ''):
|
|
20
21
|
if not delimiter:
|
|
@@ -109,7 +110,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
|
109
110
|
except Exception as e:
|
|
110
111
|
print(message,flush=True)
|
|
111
112
|
|
|
112
|
-
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER):
|
|
113
|
+
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = []):
|
|
113
114
|
"""
|
|
114
115
|
Process a line of text and update the task dictionary.
|
|
115
116
|
|
|
@@ -120,6 +121,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
120
121
|
verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
121
122
|
teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
|
|
122
123
|
strict (bool, optional): Whether to strictly enforce the correct number of columns. Defaults to True.
|
|
124
|
+
defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
123
125
|
|
|
124
126
|
Returns:
|
|
125
127
|
tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
|
|
@@ -131,36 +133,40 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
131
133
|
if verbose:
|
|
132
134
|
__teePrintOrNot(f"Ignoring empty line: {line}",teeLogger=teeLogger)
|
|
133
135
|
return correctColumnNum , []
|
|
134
|
-
if line.startswith('#'):
|
|
136
|
+
if line.startswith('#') and not line.startswith(DEFAULTS_INDICATOR_KEY):
|
|
135
137
|
if verbose:
|
|
136
138
|
__teePrintOrNot(f"Ignoring comment line: {line}",teeLogger=teeLogger)
|
|
137
139
|
return correctColumnNum , []
|
|
138
140
|
# we only interested in the lines that have the correct number of columns
|
|
139
|
-
lineCache = [segment.
|
|
141
|
+
lineCache = [segment.rstrip() for segment in line.split(delimiter)]
|
|
140
142
|
if not lineCache:
|
|
141
143
|
return correctColumnNum , []
|
|
142
144
|
if correctColumnNum == -1:
|
|
145
|
+
if defaults and len(defaults) > 1:
|
|
146
|
+
correctColumnNum = len(defaults)
|
|
147
|
+
else:
|
|
148
|
+
correctColumnNum = len(lineCache)
|
|
143
149
|
if verbose:
|
|
144
150
|
__teePrintOrNot(f"detected correctColumnNum: {len(lineCache)}",teeLogger=teeLogger)
|
|
145
|
-
correctColumnNum = len(lineCache)
|
|
146
151
|
if not lineCache[0]:
|
|
147
152
|
if verbose:
|
|
148
153
|
__teePrintOrNot(f"Ignoring line with empty key: {line}",teeLogger=teeLogger)
|
|
149
154
|
return correctColumnNum , []
|
|
150
155
|
if len(lineCache) == 1 or not any(lineCache[1:]):
|
|
151
|
-
if correctColumnNum == 1:
|
|
156
|
+
if correctColumnNum == 1:
|
|
157
|
+
taskDic[lineCache[0]] = lineCache
|
|
158
|
+
elif lineCache[0] == DEFAULTS_INDICATOR_KEY:
|
|
159
|
+
if verbose:
|
|
160
|
+
__teePrintOrNot(f"Empty defaults line found: {line}",teeLogger=teeLogger)
|
|
161
|
+
defaults = []
|
|
152
162
|
else:
|
|
153
163
|
if verbose:
|
|
154
164
|
__teePrintOrNot(f"Key {lineCache[0]} found with empty value, deleting such key's representaion",teeLogger=teeLogger)
|
|
155
165
|
if lineCache[0] in taskDic:
|
|
156
166
|
del taskDic[lineCache[0]]
|
|
157
167
|
return correctColumnNum , []
|
|
158
|
-
elif len(lineCache)
|
|
159
|
-
|
|
160
|
-
if verbose:
|
|
161
|
-
__teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
|
|
162
|
-
else:
|
|
163
|
-
if strict:
|
|
168
|
+
elif len(lineCache) != correctColumnNum:
|
|
169
|
+
if strict and not any(defaults):
|
|
164
170
|
if verbose:
|
|
165
171
|
__teePrintOrNot(f"Ignoring line with {len(lineCache)} columns: {line}",teeLogger=teeLogger)
|
|
166
172
|
return correctColumnNum , []
|
|
@@ -170,12 +176,26 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
170
176
|
lineCache += ['']*(correctColumnNum-len(lineCache))
|
|
171
177
|
elif len(lineCache) > correctColumnNum:
|
|
172
178
|
lineCache = lineCache[:correctColumnNum]
|
|
173
|
-
taskDic[lineCache[0]] = lineCache
|
|
174
179
|
if verbose:
|
|
175
|
-
__teePrintOrNot(f"
|
|
180
|
+
__teePrintOrNot(f"Correcting {lineCache[0]}",teeLogger=teeLogger)
|
|
181
|
+
# now replace empty values with defaults
|
|
182
|
+
if defaults and len(defaults) > 1:
|
|
183
|
+
for i in range(1,len(lineCache)):
|
|
184
|
+
if not lineCache[i] and i < len(defaults) and defaults[i]:
|
|
185
|
+
lineCache[i] = defaults[i]
|
|
186
|
+
if verbose:
|
|
187
|
+
__teePrintOrNot(f"Replacing empty value at {i} with default: {defaults[i]}",teeLogger=teeLogger)
|
|
188
|
+
if lineCache[0] == DEFAULTS_INDICATOR_KEY:
|
|
189
|
+
if verbose:
|
|
190
|
+
__teePrintOrNot(f"Defaults line found: {line}",teeLogger=teeLogger)
|
|
191
|
+
defaults = lineCache
|
|
192
|
+
return correctColumnNum , []
|
|
193
|
+
taskDic[lineCache[0]] = lineCache
|
|
194
|
+
if verbose:
|
|
195
|
+
__teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
|
|
176
196
|
return correctColumnNum, lineCache
|
|
177
197
|
|
|
178
|
-
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter =
|
|
198
|
+
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults = []):
|
|
179
199
|
"""
|
|
180
200
|
Reads the last valid line from a file.
|
|
181
201
|
|
|
@@ -187,6 +207,8 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
187
207
|
teeLogger (optional): Logger to use for tee print. Defaults to None.
|
|
188
208
|
encoding (str, optional): The encoding of the file. Defaults to None.
|
|
189
209
|
strict (bool, optional): Whether to enforce strict processing. Defaults to False.
|
|
210
|
+
delimiter (str, optional): The delimiter used in the file. Defaults to None.
|
|
211
|
+
defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
190
212
|
|
|
191
213
|
Returns:
|
|
192
214
|
list: The last valid line data processed by processLine, or an empty list if none found.
|
|
@@ -220,13 +242,14 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
220
242
|
if lines[i].strip(): # Skip empty lines
|
|
221
243
|
# Process the line
|
|
222
244
|
correctColumnNum, lineCache = _processLine(
|
|
223
|
-
lines[i].decode(encoding=encoding),
|
|
224
|
-
taskDic,
|
|
225
|
-
correctColumnNum,
|
|
245
|
+
line=lines[i].decode(encoding=encoding),
|
|
246
|
+
taskDic=taskDic,
|
|
247
|
+
correctColumnNum=correctColumnNum,
|
|
226
248
|
verbose=verbose,
|
|
227
249
|
teeLogger=teeLogger,
|
|
228
250
|
strict=strict,
|
|
229
|
-
delimiter=delimiter
|
|
251
|
+
delimiter=delimiter,
|
|
252
|
+
defaults=defaults,
|
|
230
253
|
)
|
|
231
254
|
# If the line is valid, return it
|
|
232
255
|
if lineCache and any(lineCache):
|
|
@@ -327,7 +350,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
|
|
|
327
350
|
return False
|
|
328
351
|
return True
|
|
329
352
|
|
|
330
|
-
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t'):
|
|
353
|
+
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = []):
|
|
331
354
|
"""
|
|
332
355
|
Compatibility method, calls readTabularFile.
|
|
333
356
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
@@ -344,6 +367,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
344
367
|
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
345
368
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
346
369
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
|
|
370
|
+
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
347
371
|
|
|
348
372
|
Returns:
|
|
349
373
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -352,9 +376,9 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
352
376
|
- Exception: If the file is not found or there is a data format error.
|
|
353
377
|
|
|
354
378
|
"""
|
|
355
|
-
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
379
|
+
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
|
|
356
380
|
|
|
357
|
-
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter =
|
|
381
|
+
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = []):
|
|
358
382
|
"""
|
|
359
383
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
360
384
|
|
|
@@ -370,6 +394,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
370
394
|
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
371
395
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
372
396
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
397
|
+
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
373
398
|
|
|
374
399
|
Returns:
|
|
375
400
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -394,12 +419,12 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
394
419
|
if verbose:
|
|
395
420
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
396
421
|
if lastLineOnly:
|
|
397
|
-
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter)
|
|
422
|
+
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter, defaults=defaults)
|
|
398
423
|
if lineCache:
|
|
399
424
|
taskDic[lineCache[0]] = lineCache
|
|
400
425
|
return lineCache
|
|
401
426
|
for line in file:
|
|
402
|
-
correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter)
|
|
427
|
+
correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
|
|
403
428
|
return taskDic
|
|
404
429
|
|
|
405
430
|
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
|
|
@@ -446,7 +471,7 @@ def appendTabularFile(fileName,lineToAppend,teeLogger = None,header = '',createI
|
|
|
446
471
|
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
447
472
|
return
|
|
448
473
|
if type(lineToAppend) == str:
|
|
449
|
-
lineToAppend = lineToAppend.
|
|
474
|
+
lineToAppend = lineToAppend.split(delimiter)
|
|
450
475
|
else:
|
|
451
476
|
for i in range(len(lineToAppend)):
|
|
452
477
|
if type(lineToAppend[i]) != str:
|
|
@@ -548,14 +573,16 @@ class TSVZed(OrderedDict):
|
|
|
548
573
|
except Exception as e:
|
|
549
574
|
print(message,flush=True)
|
|
550
575
|
|
|
551
|
-
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter =
|
|
576
|
+
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = [],strict = False):
|
|
552
577
|
super().__init__()
|
|
553
578
|
self.version = version
|
|
579
|
+
self.strict = strict
|
|
554
580
|
self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
|
|
555
581
|
self.lastUpdateTime = self.externalFileUpdateTime
|
|
556
582
|
self._fileName = fileName
|
|
557
583
|
self.teeLogger = teeLogger
|
|
558
584
|
self.delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
585
|
+
self.defaults = defualts
|
|
559
586
|
self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
|
|
560
587
|
self.correctColumnNum = -1
|
|
561
588
|
self.createIfNotExist = createIfNotExist
|
|
@@ -584,6 +611,27 @@ class TSVZed(OrderedDict):
|
|
|
584
611
|
self.load()
|
|
585
612
|
atexit.register(self.stopAppendThread)
|
|
586
613
|
|
|
614
|
+
def setDefaults(self,defaults):
|
|
615
|
+
if not defaults:
|
|
616
|
+
defaults = []
|
|
617
|
+
return
|
|
618
|
+
if isinstance(defaults,str):
|
|
619
|
+
defaults = defaults.split(self.delimiter)
|
|
620
|
+
elif not isinstance(defaults,list):
|
|
621
|
+
try:
|
|
622
|
+
defaults = list(defaults)
|
|
623
|
+
except:
|
|
624
|
+
if self.verbose:
|
|
625
|
+
self.__teePrintOrNot('Invalid defaults, setting defaults to empty.','error')
|
|
626
|
+
defaults = []
|
|
627
|
+
return
|
|
628
|
+
if not any(defaults):
|
|
629
|
+
defaults = []
|
|
630
|
+
return
|
|
631
|
+
if defaults[0] != DEFAULTS_INDICATOR_KEY:
|
|
632
|
+
defaults = [DEFAULTS_INDICATOR_KEY]+defaults
|
|
633
|
+
self.defaults = defaults
|
|
634
|
+
|
|
587
635
|
def load(self):
|
|
588
636
|
self.reload()
|
|
589
637
|
if self.rewrite_on_load:
|
|
@@ -597,7 +645,7 @@ class TSVZed(OrderedDict):
|
|
|
597
645
|
if self.verbose:
|
|
598
646
|
self.__teePrintOrNot(f"Loading {self._fileName}")
|
|
599
647
|
super().clear()
|
|
600
|
-
readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict =
|
|
648
|
+
readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = self.strict, delimiter = self.delimiter, defaults=self.defaults)
|
|
601
649
|
if self.verbose:
|
|
602
650
|
self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
|
|
603
651
|
self.correctColumnNum = len(self.header.split(self.delimiter)) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
|
|
@@ -612,30 +660,55 @@ class TSVZed(OrderedDict):
|
|
|
612
660
|
return self
|
|
613
661
|
|
|
614
662
|
def __setitem__(self,key,value):
|
|
615
|
-
key = str(key).
|
|
663
|
+
key = str(key).rstrip()
|
|
616
664
|
if not key:
|
|
617
665
|
self.__teePrintOrNot('Key cannot be empty','error')
|
|
618
666
|
return
|
|
619
667
|
if type(value) == str:
|
|
620
|
-
value = value.
|
|
668
|
+
value = value.split(self.delimiter)
|
|
621
669
|
# sanitize the value
|
|
622
|
-
value = [(str(segment).
|
|
623
|
-
#
|
|
670
|
+
value = [(str(segment).rstrip() if type(segment) != str else segment.rstrip()) if segment else '' for segment in value]
|
|
671
|
+
# escape the delimiter and newline characters
|
|
672
|
+
value = [segment.replace(self.delimiter,'<sep>').replace('\n','\\n') for segment in value]
|
|
624
673
|
# the first field in value should be the key
|
|
625
674
|
# add it if it is not there
|
|
626
675
|
if not value or value[0] != key:
|
|
627
676
|
value = [key]+value
|
|
628
677
|
# verify the value has the correct number of columns
|
|
629
678
|
if self.correctColumnNum != 1 and len(value) == 1:
|
|
630
|
-
# this means we want to clear /
|
|
679
|
+
# this means we want to clear / delete the key
|
|
631
680
|
self.__delitem__(key)
|
|
632
681
|
elif self.correctColumnNum > 0:
|
|
633
|
-
|
|
682
|
+
if len(value) != self.correctColumnNum:
|
|
683
|
+
if self.strict:
|
|
684
|
+
self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}. Refuse adding key...",'error')
|
|
685
|
+
return
|
|
686
|
+
elif self.verbose:
|
|
687
|
+
self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}, correcting...",'warning')
|
|
688
|
+
if len(value) < self.correctColumnNum:
|
|
689
|
+
value += ['']*(self.correctColumnNum-len(value))
|
|
690
|
+
elif len(value) > self.correctColumnNum:
|
|
691
|
+
value = value[:self.correctColumnNum]
|
|
634
692
|
else:
|
|
635
693
|
self.correctColumnNum = len(value)
|
|
694
|
+
if self.defaults and len(self.defaults) > 1:
|
|
695
|
+
for i in range(1,len(value)):
|
|
696
|
+
if not value[i] and i < len(self.defaults) and self.defaults[i]:
|
|
697
|
+
value[i] = self.defaults[i]
|
|
698
|
+
if self.verbose:
|
|
699
|
+
self.__teePrintOrNot(f" Replacing empty value at {i} with default: {self.defaults[i]}")
|
|
700
|
+
if key == DEFAULTS_INDICATOR_KEY:
|
|
701
|
+
self.defaults = value
|
|
702
|
+
if self.verbose:
|
|
703
|
+
self.__teePrintOrNot(f"Defaults set to {value}")
|
|
704
|
+
if not self.memoryOnly:
|
|
705
|
+
self.appendQueue.append(self.delimiter.join(value))
|
|
706
|
+
self.lastUpdateTime = get_time_ns()
|
|
707
|
+
if self.verbose:
|
|
708
|
+
self.__teePrintOrNot(f"Appending Defaults {key} to the appendQueue")
|
|
709
|
+
return
|
|
636
710
|
if self.verbose:
|
|
637
711
|
self.__teePrintOrNot(f"Setting {key} to {value}")
|
|
638
|
-
|
|
639
712
|
if key in self:
|
|
640
713
|
if self[key] == value:
|
|
641
714
|
if self.verbose:
|
|
@@ -644,9 +717,13 @@ class TSVZed(OrderedDict):
|
|
|
644
717
|
self.dirty = True
|
|
645
718
|
# update the dictionary,
|
|
646
719
|
super().__setitem__(key,value)
|
|
647
|
-
if self.verbose:
|
|
648
|
-
self.__teePrintOrNot(f"Key {key} updated")
|
|
649
720
|
if self.memoryOnly:
|
|
721
|
+
if self.verbose:
|
|
722
|
+
self.__teePrintOrNot(f"Key {key} updated in memory only")
|
|
723
|
+
return
|
|
724
|
+
elif key.startswith('#'):
|
|
725
|
+
if self.verbose:
|
|
726
|
+
self.__teePrintOrNot(f"Key {key} updated in memory only as it starts with #")
|
|
650
727
|
return
|
|
651
728
|
if self.verbose:
|
|
652
729
|
self.__teePrintOrNot(f"Appending {key} to the appendQueue")
|
|
@@ -659,16 +736,29 @@ class TSVZed(OrderedDict):
|
|
|
659
736
|
|
|
660
737
|
|
|
661
738
|
def __delitem__(self,key):
|
|
662
|
-
key = str(key).
|
|
739
|
+
key = str(key).rstrip()
|
|
740
|
+
if key == DEFAULTS_INDICATOR_KEY:
|
|
741
|
+
self.defaults = []
|
|
742
|
+
if self.verbose:
|
|
743
|
+
self.__teePrintOrNot(f"Defaults cleared")
|
|
744
|
+
if not self.memoryOnly:
|
|
745
|
+
self.__appendEmptyLine(key)
|
|
746
|
+
if self.verbose:
|
|
747
|
+
self.__teePrintOrNot(f"Appending empty default line {key}")
|
|
748
|
+
return
|
|
663
749
|
# delete the key from the dictionary and update the file
|
|
664
750
|
if key not in self:
|
|
665
751
|
if self.verbose:
|
|
666
752
|
self.__teePrintOrNot(f"Key {key} not found")
|
|
667
753
|
return
|
|
668
754
|
super().__delitem__(key)
|
|
669
|
-
if self.memoryOnly:
|
|
755
|
+
if self.memoryOnly or key.startswith('#'):
|
|
756
|
+
if self.verbose:
|
|
757
|
+
self.__teePrintOrNot(f"Key {key} deleted in memory")
|
|
670
758
|
return
|
|
671
759
|
self.__appendEmptyLine(key)
|
|
760
|
+
if self.verbose:
|
|
761
|
+
self.__teePrintOrNot(f"Appending empty line {key}")
|
|
672
762
|
self.lastUpdateTime = get_time_ns()
|
|
673
763
|
|
|
674
764
|
def __appendEmptyLine(self,key):
|
|
@@ -868,30 +958,35 @@ memoryOnly:{self.memoryOnly}
|
|
|
868
958
|
return self
|
|
869
959
|
|
|
870
960
|
def mapToFile(self):
|
|
961
|
+
mec = self.monitor_external_changes
|
|
962
|
+
self.monitor_external_changes = False
|
|
871
963
|
try:
|
|
872
964
|
if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
|
|
873
965
|
self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
|
|
874
966
|
file = self.get_file_obj('r+b')
|
|
875
967
|
overWrite = False
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
968
|
+
if self.header:
|
|
969
|
+
line = file.readline().decode(self.encoding)
|
|
970
|
+
aftPos = file.tell()
|
|
971
|
+
if not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = self.strict):
|
|
972
|
+
file.seek(0)
|
|
973
|
+
file.write(f'{self.header}\n'.encode(encoding=self.encoding))
|
|
974
|
+
# if the header is not the same length as the line, we need to overwrite the file
|
|
975
|
+
if aftPos != file.tell():
|
|
976
|
+
overWrite = True
|
|
977
|
+
if self.verbose:
|
|
978
|
+
self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
|
|
886
979
|
for value in self.values():
|
|
887
|
-
|
|
980
|
+
if value[0].startswith('#'):
|
|
981
|
+
continue
|
|
982
|
+
strToWrite = self.delimiter.join(value)
|
|
888
983
|
if overWrite:
|
|
889
984
|
if self.verbose:
|
|
890
985
|
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
891
|
-
file.write(strToWrite.encode(encoding=self.encoding))
|
|
986
|
+
file.write(strToWrite.encode(encoding=self.encoding)+b'\n')
|
|
892
987
|
continue
|
|
893
988
|
pos = file.tell()
|
|
894
|
-
line = file.readline()
|
|
989
|
+
line = file.readline()
|
|
895
990
|
aftPos = file.tell()
|
|
896
991
|
if not line or pos == aftPos:
|
|
897
992
|
if self.verbose:
|
|
@@ -899,13 +994,14 @@ memoryOnly:{self.memoryOnly}
|
|
|
899
994
|
file.write(strToWrite.encode(encoding=self.encoding))
|
|
900
995
|
overWrite = True
|
|
901
996
|
continue
|
|
997
|
+
strToWrite = strToWrite.encode(encoding=self.encoding).ljust(len(line)-1)+b'\n'
|
|
902
998
|
if line != strToWrite:
|
|
903
999
|
if self.verbose:
|
|
904
|
-
self.__teePrintOrNot(f"
|
|
1000
|
+
self.__teePrintOrNot(f"Modifing {value} to {self._fileName}")
|
|
905
1001
|
file.seek(pos)
|
|
906
1002
|
# fill the string with space to write to the correct length
|
|
907
1003
|
#file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
|
|
908
|
-
file.write(strToWrite
|
|
1004
|
+
file.write(strToWrite)
|
|
909
1005
|
if aftPos != file.tell():
|
|
910
1006
|
overWrite = True
|
|
911
1007
|
file.truncate()
|
|
@@ -921,6 +1017,8 @@ memoryOnly:{self.memoryOnly}
|
|
|
921
1017
|
import traceback
|
|
922
1018
|
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
923
1019
|
self.deSynced = True
|
|
1020
|
+
self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
|
|
1021
|
+
self.monitor_external_changes = mec
|
|
924
1022
|
return self
|
|
925
1023
|
|
|
926
1024
|
def checkExternalChanges(self):
|
|
@@ -1062,7 +1160,10 @@ def __main__():
|
|
|
1062
1160
|
parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
|
|
1063
1161
|
parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
|
|
1064
1162
|
parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
|
|
1065
|
-
parser.add_argument('
|
|
1163
|
+
parser.add_argument('--defaults', type=str, help='Default values to fill in the missing columns. seperate using --delimiter. Ex. if -d = comma, --defaults="key,value1,value2..." Note: Please specify the key. But it will not be used as a key need to be unique in data.')
|
|
1164
|
+
strictMode = parser.add_mutually_exclusive_group()
|
|
1165
|
+
strictMode.add_argument('-s', '--strict', dest = 'strict',action='store_true', help='Strict mode. Do not parse values that seems malformed, check for column numbers / headers')
|
|
1166
|
+
strictMode.add_argument('-f', '--force', dest = 'strict',action='store_false', help='Force the operation. Ignore checks for column numbers / headers')
|
|
1066
1167
|
parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
|
|
1067
1168
|
parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
|
|
1068
1169
|
args = parser.parse_args()
|
|
@@ -1074,6 +1175,13 @@ def __main__():
|
|
|
1074
1175
|
except Exception as e:
|
|
1075
1176
|
print(f"Failed to decode header: {args.header}")
|
|
1076
1177
|
header = ''
|
|
1178
|
+
defaults = []
|
|
1179
|
+
if args.defaults:
|
|
1180
|
+
try:
|
|
1181
|
+
defaults = args.defaults.encode().decode('unicode_escape').split(args.delimiter)
|
|
1182
|
+
except Exception as e:
|
|
1183
|
+
print(f"Failed to decode defaults: {args.defaults}")
|
|
1184
|
+
defaults = []
|
|
1077
1185
|
|
|
1078
1186
|
if args.operation == 'read':
|
|
1079
1187
|
# check if the file exist
|
|
@@ -1081,14 +1189,14 @@ def __main__():
|
|
|
1081
1189
|
print(f"File not found: {args.filename}")
|
|
1082
1190
|
return
|
|
1083
1191
|
# read the file
|
|
1084
|
-
data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict=
|
|
1192
|
+
data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= args.strict, delimiter=args.delimiter, defaults=defaults)
|
|
1085
1193
|
print(pretty_format_table(data.values(),delimiter=args.delimiter))
|
|
1086
1194
|
elif args.operation == 'append':
|
|
1087
|
-
appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict=
|
|
1195
|
+
appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
|
|
1088
1196
|
elif args.operation == 'delete':
|
|
1089
|
-
appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict=
|
|
1197
|
+
appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
|
|
1090
1198
|
elif args.operation == 'clear':
|
|
1091
|
-
clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=
|
|
1199
|
+
clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
|
|
1092
1200
|
else:
|
|
1093
1201
|
print("Invalid operation")
|
|
1094
1202
|
return
|
TSVZ-3.2.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
TSVZ.py,sha256=l2y-iLRf0xFusH1aoAciHSBkix0P8k643A1ChU25dPA,54026
|
|
2
|
-
TSVZ-3.2.dist-info/METADATA,sha256=p381Xig6aZj75lDC_D3Loa1F4cZa8PJSfALC9UrfrbA,1825
|
|
3
|
-
TSVZ-3.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
4
|
-
TSVZ-3.2.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
|
|
5
|
-
TSVZ-3.2.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
|
|
6
|
-
TSVZ-3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|