TSVZ 2.70__py3-none-any.whl → 3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {TSVZ-2.70.dist-info → TSVZ-3.2.dist-info}/METADATA +57 -56
- TSVZ-3.2.dist-info/RECORD +6 -0
- {TSVZ-2.70.dist-info → TSVZ-3.2.dist-info}/entry_points.txt +0 -0
- {TSVZ-2.70.dist-info → TSVZ-3.2.dist-info}/top_level.txt +0 -0
- TSVZ.py +277 -148
- TSVZ-2.70.dist-info/LICENSE +0 -674
- TSVZ-2.70.dist-info/RECORD +0 -7
- {TSVZ-2.70.dist-info → TSVZ-3.2.dist-info}/WHEEL +0 -0
TSVZ.py
CHANGED
|
@@ -4,59 +4,90 @@ from collections import OrderedDict , deque
|
|
|
4
4
|
import time
|
|
5
5
|
import atexit
|
|
6
6
|
import threading
|
|
7
|
+
import re
|
|
7
8
|
|
|
8
9
|
if os.name == 'nt':
|
|
9
10
|
import msvcrt
|
|
10
11
|
elif os.name == 'posix':
|
|
11
12
|
import fcntl
|
|
12
13
|
|
|
13
|
-
version = '
|
|
14
|
+
version = '3.02'
|
|
14
15
|
author = 'pan@zopyr.us'
|
|
15
16
|
|
|
17
|
+
DEFAULT_DELIMITER = '\t'
|
|
16
18
|
|
|
17
|
-
def
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
19
|
+
def get_delimiter(delimiter,file_name = ''):
|
|
20
|
+
if not delimiter:
|
|
21
|
+
return DEFAULT_DELIMITER
|
|
22
|
+
elif delimiter == ...:
|
|
23
|
+
if not file_name:
|
|
24
|
+
rtn = '\t'
|
|
25
|
+
if file_name.endswith('.csv'):
|
|
26
|
+
rtn = ','
|
|
27
|
+
elif file_name.endswith('.nsv'):
|
|
28
|
+
rtn = '\0'
|
|
29
|
+
elif file_name.endswith('.psv'):
|
|
30
|
+
rtn = '|'
|
|
31
|
+
else:
|
|
32
|
+
rtn = '\t'
|
|
33
|
+
elif delimiter == 'comma':
|
|
34
|
+
rtn = ','
|
|
35
|
+
elif delimiter == 'tab':
|
|
36
|
+
rtn = '\t'
|
|
37
|
+
elif delimiter == 'pipe':
|
|
38
|
+
rtn = '|'
|
|
39
|
+
elif delimiter == 'null':
|
|
40
|
+
rtn = '\0'
|
|
41
|
+
else:
|
|
42
|
+
rtn = delimiter.encode().decode('unicode_escape')
|
|
43
|
+
DEFAULT_DELIMITER = rtn
|
|
44
|
+
return rtn
|
|
45
|
+
|
|
46
|
+
def pretty_format_table(data, delimiter = DEFAULT_DELIMITER):
|
|
47
|
+
version = 1.0
|
|
48
|
+
if not data:
|
|
49
|
+
return ''
|
|
50
|
+
if type(data) == str:
|
|
51
|
+
data = data.strip('\n').split('\n')
|
|
52
|
+
data = [line.split(delimiter) for line in data]
|
|
53
|
+
elif isinstance(data, dict):
|
|
54
|
+
# flatten the 2D dict to a list of lists
|
|
55
|
+
if isinstance(next(iter(data.values())), dict):
|
|
56
|
+
tempData = [['key'] + list(next(iter(data.values())).keys())]
|
|
57
|
+
tempData.extend( [[key] + list(value.values()) for key, value in data.items()])
|
|
58
|
+
data = tempData
|
|
59
|
+
else:
|
|
60
|
+
# it is a dict of lists
|
|
61
|
+
data = [[key] + list(value) for key, value in data.items()]
|
|
62
|
+
elif type(data) != list:
|
|
63
|
+
data = list(data)
|
|
64
|
+
# format the list into 2d list of list of strings
|
|
65
|
+
if isinstance(data[0], dict):
|
|
66
|
+
tempData = [data[0].keys()]
|
|
67
|
+
tempData.extend([list(item.values()) for item in data])
|
|
68
|
+
data = tempData
|
|
69
|
+
data = [[str(item) for item in row] for row in data]
|
|
70
|
+
num_cols = len(data[0])
|
|
71
|
+
col_widths = [0] * num_cols
|
|
72
|
+
# Calculate the maximum width of each column
|
|
73
|
+
for c in range(num_cols):
|
|
74
|
+
#col_widths[c] = max(len(row[c]) for row in data)
|
|
75
|
+
# handle ansii escape sequences
|
|
76
|
+
col_widths[c] = max(len(re.sub(r'\x1b\[[0-?]*[ -/]*[@-~]','',row[c])) for row in data)
|
|
77
|
+
# Build the row format string
|
|
78
|
+
row_format = ' | '.join('{{:<{}}}'.format(width) for width in col_widths)
|
|
79
|
+
# Print the header
|
|
80
|
+
header = data[0]
|
|
81
|
+
outTable = []
|
|
82
|
+
outTable.append(row_format.format(*header))
|
|
83
|
+
outTable.append('-+-'.join('-' * width for width in col_widths))
|
|
84
|
+
for row in data[1:]:
|
|
85
|
+
# if the row is empty, print an divider
|
|
86
|
+
if not any(row):
|
|
87
|
+
outTable.append('-+-'.join('-' * width for width in col_widths))
|
|
88
|
+
else:
|
|
89
|
+
outTable.append(row_format.format(*row))
|
|
90
|
+
return '\n'.join(outTable) + '\n'
|
|
60
91
|
|
|
61
92
|
def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
62
93
|
"""
|
|
@@ -78,7 +109,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
|
78
109
|
except Exception as e:
|
|
79
110
|
print(message,flush=True)
|
|
80
111
|
|
|
81
|
-
def
|
|
112
|
+
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER):
|
|
82
113
|
"""
|
|
83
114
|
Process a line of text and update the task dictionary.
|
|
84
115
|
|
|
@@ -94,7 +125,7 @@ def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,s
|
|
|
94
125
|
tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
|
|
95
126
|
|
|
96
127
|
"""
|
|
97
|
-
line = line.
|
|
128
|
+
line = line.strip(' ').strip('\x00').rstrip('\r\n')
|
|
98
129
|
# we throw away the lines that start with '#'
|
|
99
130
|
if not line :
|
|
100
131
|
if verbose:
|
|
@@ -105,7 +136,7 @@ def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,s
|
|
|
105
136
|
__teePrintOrNot(f"Ignoring comment line: {line}",teeLogger=teeLogger)
|
|
106
137
|
return correctColumnNum , []
|
|
107
138
|
# we only interested in the lines that have the correct number of columns
|
|
108
|
-
lineCache = [segment.strip() for segment in line.split(
|
|
139
|
+
lineCache = [segment.strip() for segment in line.split(delimiter)]
|
|
109
140
|
if not lineCache:
|
|
110
141
|
return correctColumnNum , []
|
|
111
142
|
if correctColumnNum == -1:
|
|
@@ -144,7 +175,7 @@ def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,s
|
|
|
144
175
|
__teePrintOrNot(f"Key {lineCache[0]} added after correction",teeLogger=teeLogger)
|
|
145
176
|
return correctColumnNum, lineCache
|
|
146
177
|
|
|
147
|
-
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False):
|
|
178
|
+
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...):
|
|
148
179
|
"""
|
|
149
180
|
Reads the last valid line from a file.
|
|
150
181
|
|
|
@@ -154,6 +185,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
154
185
|
correctColumnNum (int): A column number to pass to processLine function.
|
|
155
186
|
verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
156
187
|
teeLogger (optional): Logger to use for tee print. Defaults to None.
|
|
188
|
+
encoding (str, optional): The encoding of the file. Defaults to None.
|
|
157
189
|
strict (bool, optional): Whether to enforce strict processing. Defaults to False.
|
|
158
190
|
|
|
159
191
|
Returns:
|
|
@@ -161,6 +193,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
161
193
|
"""
|
|
162
194
|
chunk_size = 1024 # Read in chunks of 1024 bytes
|
|
163
195
|
last_valid_line = []
|
|
196
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
164
197
|
if verbose:
|
|
165
198
|
__teePrintOrNot(f"Reading last line only from {fileName}",teeLogger=teeLogger)
|
|
166
199
|
with open(fileName, 'rb') as file:
|
|
@@ -186,13 +219,14 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
186
219
|
for i in range(len(lines) - 1, -1, -1):
|
|
187
220
|
if lines[i].strip(): # Skip empty lines
|
|
188
221
|
# Process the line
|
|
189
|
-
correctColumnNum, lineCache =
|
|
190
|
-
lines[i],
|
|
222
|
+
correctColumnNum, lineCache = _processLine(
|
|
223
|
+
lines[i].decode(encoding=encoding),
|
|
191
224
|
taskDic,
|
|
192
225
|
correctColumnNum,
|
|
193
226
|
verbose=verbose,
|
|
194
227
|
teeLogger=teeLogger,
|
|
195
|
-
strict=strict
|
|
228
|
+
strict=strict,
|
|
229
|
+
delimiter=delimiter
|
|
196
230
|
)
|
|
197
231
|
# If the line is valid, return it
|
|
198
232
|
if lineCache and any(lineCache):
|
|
@@ -204,7 +238,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
204
238
|
# Return empty list if no valid line found
|
|
205
239
|
return last_valid_line
|
|
206
240
|
|
|
207
|
-
def
|
|
241
|
+
def _formatHeader(header,verbose = False,teeLogger = None,delimiter = DEFAULT_DELIMITER):
|
|
208
242
|
"""
|
|
209
243
|
Format the header string.
|
|
210
244
|
|
|
@@ -218,12 +252,12 @@ def formatHeader(header,verbose = False,teeLogger = None):
|
|
|
218
252
|
"""
|
|
219
253
|
if type(header) != str:
|
|
220
254
|
try:
|
|
221
|
-
header =
|
|
255
|
+
header = delimiter.join(header)
|
|
222
256
|
except:
|
|
223
257
|
if verbose:
|
|
224
258
|
__teePrintOrNot('Invalid header, setting header to empty.','error',teeLogger=teeLogger)
|
|
225
259
|
header = ''
|
|
226
|
-
header = header.
|
|
260
|
+
header = delimiter.join([segment.rstrip() for segment in header.split(delimiter)])
|
|
227
261
|
# if header:
|
|
228
262
|
# if not header.endswith('\n'):
|
|
229
263
|
# header += '\n'
|
|
@@ -231,7 +265,7 @@ def formatHeader(header,verbose = False,teeLogger = None):
|
|
|
231
265
|
# header = ''
|
|
232
266
|
return header
|
|
233
267
|
|
|
234
|
-
def
|
|
268
|
+
def _lineContainHeader(header,line,verbose = False,teeLogger = None,strict = False,delimiter = DEFAULT_DELIMITER):
|
|
235
269
|
"""
|
|
236
270
|
Verify if a line contains the header.
|
|
237
271
|
|
|
@@ -245,26 +279,24 @@ def lineContainHeader(header,line,verbose = False,teeLogger = None,strict = Fals
|
|
|
245
279
|
Returns:
|
|
246
280
|
bool: True if the header matches the line, False otherwise.
|
|
247
281
|
"""
|
|
248
|
-
|
|
249
|
-
|
|
282
|
+
header = [segment.rstrip() for segment in header.split(delimiter)]
|
|
283
|
+
line = [segment.rstrip() for segment in line.split(delimiter)]
|
|
250
284
|
if verbose:
|
|
251
|
-
__teePrintOrNot(f"Header: \n{
|
|
252
|
-
__teePrintOrNot(f"First line: \n{
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
if len(headerList) != len(lineList) or any([headerList[i] not in lineList[i] for i in range(len(headerList))]):
|
|
256
|
-
__teePrintOrNot(f"Header mismatch: \n{escapedLine} \n!= \n{escapedHeader}",teeLogger=teeLogger)
|
|
285
|
+
__teePrintOrNot(f"Header: \n{header}",teeLogger=teeLogger)
|
|
286
|
+
__teePrintOrNot(f"First line: \n{line}",teeLogger=teeLogger)
|
|
287
|
+
if len(header) != len(line) or any([header[i] not in line[i] for i in range(len(header))]):
|
|
288
|
+
__teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header}",teeLogger=teeLogger)
|
|
257
289
|
if strict:
|
|
258
290
|
raise Exception("Data format error! Header mismatch")
|
|
259
291
|
return False
|
|
260
292
|
return True
|
|
261
293
|
|
|
262
|
-
def
|
|
294
|
+
def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,header = '',encoding = 'utf8',strict = True,delimiter = DEFAULT_DELIMITER):
|
|
263
295
|
"""
|
|
264
|
-
Verify the existence of
|
|
296
|
+
Verify the existence of the tabular file.
|
|
265
297
|
|
|
266
298
|
Parameters:
|
|
267
|
-
- fileName (str): The path of the
|
|
299
|
+
- fileName (str): The path of the tabular file.
|
|
268
300
|
- createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to True.
|
|
269
301
|
- teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
|
|
270
302
|
- header (str, optional): The header line to verify against. Defaults to ''.
|
|
@@ -274,8 +306,14 @@ def verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = None,header
|
|
|
274
306
|
Returns:
|
|
275
307
|
bool: True if the file exists, False otherwise.
|
|
276
308
|
"""
|
|
277
|
-
if not fileName.endswith('.tsv'):
|
|
309
|
+
if delimiter and delimiter == '\t' and not fileName.endswith('.tsv'):
|
|
278
310
|
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .tsv','warning',teeLogger=teeLogger)
|
|
311
|
+
elif delimiter and delimiter == ',' and not fileName.endswith('.csv'):
|
|
312
|
+
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .csv','warning',teeLogger=teeLogger)
|
|
313
|
+
elif delimiter and delimiter == '\0' and not fileName.endswith('.nsv'):
|
|
314
|
+
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .nsv','warning',teeLogger=teeLogger)
|
|
315
|
+
elif delimiter and delimiter == '|' and not fileName.endswith('.psv'):
|
|
316
|
+
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .psv','warning',teeLogger=teeLogger)
|
|
279
317
|
if not os.path.isfile(fileName):
|
|
280
318
|
if createIfNotExist:
|
|
281
319
|
with open(fileName, mode ='w',encoding=encoding)as file:
|
|
@@ -289,14 +327,41 @@ def verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = None,header
|
|
|
289
327
|
return False
|
|
290
328
|
return True
|
|
291
329
|
|
|
292
|
-
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True):
|
|
330
|
+
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t'):
|
|
331
|
+
"""
|
|
332
|
+
Compatibility method, calls readTabularFile.
|
|
333
|
+
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
334
|
+
|
|
335
|
+
Parameters:
|
|
336
|
+
- fileName (str): The path to the Tabular file.
|
|
337
|
+
- teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
|
|
338
|
+
- header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
|
|
339
|
+
- createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
|
|
340
|
+
- lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
|
|
341
|
+
- verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
|
|
342
|
+
- verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
343
|
+
- taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
|
|
344
|
+
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
345
|
+
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
346
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
350
|
+
|
|
351
|
+
Raises:
|
|
352
|
+
- Exception: If the file is not found or there is a data format error.
|
|
353
|
+
|
|
293
354
|
"""
|
|
294
|
-
|
|
355
|
+
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
356
|
+
|
|
357
|
+
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...):
|
|
358
|
+
"""
|
|
359
|
+
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
295
360
|
|
|
296
361
|
Parameters:
|
|
297
|
-
- fileName (str): The path to the
|
|
362
|
+
- fileName (str): The path to the Tabular file.
|
|
298
363
|
- teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
|
|
299
|
-
- header (str or list, optional): The header of the
|
|
364
|
+
- header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
|
|
300
365
|
- createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
|
|
301
366
|
- lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
|
|
302
367
|
- verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
|
|
@@ -304,9 +369,10 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
304
369
|
- taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
|
|
305
370
|
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
306
371
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
372
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
307
373
|
|
|
308
374
|
Returns:
|
|
309
|
-
- OrderedDict: The dictionary containing the data from the
|
|
375
|
+
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
310
376
|
|
|
311
377
|
Raises:
|
|
312
378
|
- Exception: If the file is not found or there is a data format error.
|
|
@@ -314,33 +380,35 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
314
380
|
"""
|
|
315
381
|
if taskDic is None:
|
|
316
382
|
taskDic = {}
|
|
317
|
-
|
|
318
|
-
|
|
383
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
384
|
+
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger, delimiter = delimiter)
|
|
385
|
+
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
319
386
|
return taskDic
|
|
320
387
|
with open(fileName, mode ='rb')as file:
|
|
321
388
|
correctColumnNum = -1
|
|
322
|
-
if header.
|
|
389
|
+
if header.rstrip():
|
|
323
390
|
if verifyHeader:
|
|
324
|
-
line = file.readline().decode()
|
|
325
|
-
if
|
|
326
|
-
correctColumnNum = len(header.
|
|
391
|
+
line = file.readline().decode(encoding=encoding)
|
|
392
|
+
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
393
|
+
correctColumnNum = len(header.split(delimiter))
|
|
327
394
|
if verbose:
|
|
328
395
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
329
396
|
if lastLineOnly:
|
|
330
|
-
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict)
|
|
397
|
+
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter)
|
|
331
398
|
if lineCache:
|
|
332
399
|
taskDic[lineCache[0]] = lineCache
|
|
333
400
|
return lineCache
|
|
334
401
|
for line in file:
|
|
335
|
-
correctColumnNum, lineCache =
|
|
402
|
+
correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter)
|
|
336
403
|
return taskDic
|
|
337
404
|
|
|
338
|
-
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True):
|
|
405
|
+
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
|
|
339
406
|
"""
|
|
340
|
-
|
|
407
|
+
Compatibility method, calls appendTabularFile.
|
|
408
|
+
Append a line of data to a Tabular file.
|
|
341
409
|
Parameters:
|
|
342
|
-
- fileName (str): The path of the
|
|
343
|
-
- lineToAppend (str or list): The line of data to append. If it is a string, it will be split by
|
|
410
|
+
- fileName (str): The path of the Tabular file.
|
|
411
|
+
- lineToAppend (str or list): The line of data to append. If it is a string, it will be split by delimiter to form a list.
|
|
344
412
|
- teeLogger (optional): A logger object for logging messages.
|
|
345
413
|
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
346
414
|
- createIfNotExist (bool, optional): If True, the file will be created if it does not exist. If False and the file does not exist, an exception will be raised.
|
|
@@ -348,15 +416,37 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
|
|
|
348
416
|
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
349
417
|
- encoding (str, optional): The encoding of the file.
|
|
350
418
|
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
419
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
351
420
|
Raises:
|
|
352
421
|
- Exception: If the file does not exist and createIfNotExist is False.
|
|
353
422
|
- Exception: If the existing header does not match the provided header.
|
|
354
423
|
"""
|
|
355
|
-
header =
|
|
356
|
-
|
|
424
|
+
return appendTabularFile(fileName,lineToAppend,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding, strict = strict, delimiter = delimiter)
|
|
425
|
+
|
|
426
|
+
def appendTabularFile(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = ...):
|
|
427
|
+
"""
|
|
428
|
+
Append a line of data to a Tabular file.
|
|
429
|
+
Parameters:
|
|
430
|
+
- fileName (str): The path of the Tabular file.
|
|
431
|
+
- lineToAppend (str or list): The line of data to append. If it is a string, it will be split by delimiter to form a list.
|
|
432
|
+
- teeLogger (optional): A logger object for logging messages.
|
|
433
|
+
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
434
|
+
- createIfNotExist (bool, optional): If True, the file will be created if it does not exist. If False and the file does not exist, an exception will be raised.
|
|
435
|
+
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
436
|
+
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
437
|
+
- encoding (str, optional): The encoding of the file.
|
|
438
|
+
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
439
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
440
|
+
Raises:
|
|
441
|
+
- Exception: If the file does not exist and createIfNotExist is False.
|
|
442
|
+
- Exception: If the existing header does not match the provided header.
|
|
443
|
+
"""
|
|
444
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
445
|
+
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger,delimiter=delimiter)
|
|
446
|
+
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
357
447
|
return
|
|
358
448
|
if type(lineToAppend) == str:
|
|
359
|
-
lineToAppend = lineToAppend.strip().split(
|
|
449
|
+
lineToAppend = lineToAppend.strip().split(delimiter)
|
|
360
450
|
else:
|
|
361
451
|
for i in range(len(lineToAppend)):
|
|
362
452
|
if type(lineToAppend[i]) != str:
|
|
@@ -367,11 +457,11 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
|
|
|
367
457
|
|
|
368
458
|
with open(fileName, mode ='r+b')as file:
|
|
369
459
|
correctColumnNum = len(lineToAppend)
|
|
370
|
-
if header.
|
|
460
|
+
if header.rstrip():
|
|
371
461
|
if verifyHeader:
|
|
372
|
-
line = file.readline().decode()
|
|
373
|
-
if
|
|
374
|
-
correctColumnNum = len(header.
|
|
462
|
+
line = file.readline().decode(encoding=encoding)
|
|
463
|
+
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
464
|
+
correctColumnNum = len(header.split(delimiter))
|
|
375
465
|
if verbose:
|
|
376
466
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
377
467
|
# truncate / fill the lineToAppend to the correct number of columns
|
|
@@ -383,15 +473,16 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
|
|
|
383
473
|
file.seek(-1, os.SEEK_END)
|
|
384
474
|
if file.read(1) != b'\n':
|
|
385
475
|
file.write(b'\n')
|
|
386
|
-
file.write(
|
|
476
|
+
file.write(get_delimiter(delimiter).join(lineToAppend).encode(encoding=encoding) + b'\n')
|
|
387
477
|
if verbose:
|
|
388
478
|
__teePrintOrNot(f"Appended {lineToAppend} to {fileName}",teeLogger=teeLogger)
|
|
389
479
|
|
|
390
|
-
def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False):
|
|
480
|
+
def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False,delimiter = '\t'):
|
|
391
481
|
"""
|
|
392
|
-
|
|
482
|
+
Compatibility method, calls clearTabularFile.
|
|
483
|
+
Clear the contents of a Tabular file. Will create if not exist.
|
|
393
484
|
Parameters:
|
|
394
|
-
- fileName (str): The path of the
|
|
485
|
+
- fileName (str): The path of the Tabular file.
|
|
395
486
|
- teeLogger (optional): A logger object for logging messages.
|
|
396
487
|
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
397
488
|
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
@@ -399,14 +490,29 @@ def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose
|
|
|
399
490
|
- encoding (str, optional): The encoding of the file.
|
|
400
491
|
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
401
492
|
"""
|
|
402
|
-
header =
|
|
403
|
-
|
|
493
|
+
return clearTabularFile(fileName,teeLogger = teeLogger,header = header,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
494
|
+
|
|
495
|
+
def clearTabularFile(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False,delimiter = ...):
|
|
496
|
+
"""
|
|
497
|
+
Clear the contents of a Tabular file. Will create if not exist.
|
|
498
|
+
Parameters:
|
|
499
|
+
- fileName (str): The path of the Tabular file.
|
|
500
|
+
- teeLogger (optional): A logger object for logging messages.
|
|
501
|
+
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
502
|
+
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
503
|
+
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
504
|
+
- encoding (str, optional): The encoding of the file.
|
|
505
|
+
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
506
|
+
"""
|
|
507
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
508
|
+
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger,delimiter=delimiter)
|
|
509
|
+
if not _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False,delimiter=delimiter):
|
|
404
510
|
raise Exception("Something catastrophic happened! File still not found after creation")
|
|
405
511
|
else:
|
|
406
512
|
with open(fileName, mode ='r+',encoding=encoding)as file:
|
|
407
|
-
if header.
|
|
408
|
-
line = file.readline()
|
|
409
|
-
if not
|
|
513
|
+
if header.rstrip() and verifyHeader:
|
|
514
|
+
line = file.readline()
|
|
515
|
+
if not _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
410
516
|
__teePrintOrNot(f'Warning: Header mismatch in {fileName}. Keeping original header in file...','warning',teeLogger)
|
|
411
517
|
file.truncate()
|
|
412
518
|
else:
|
|
@@ -442,14 +548,15 @@ class TSVZed(OrderedDict):
|
|
|
442
548
|
except Exception as e:
|
|
443
549
|
print(message,flush=True)
|
|
444
550
|
|
|
445
|
-
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding =
|
|
551
|
+
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...):
|
|
446
552
|
super().__init__()
|
|
447
553
|
self.version = version
|
|
448
554
|
self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
|
|
449
555
|
self.lastUpdateTime = self.externalFileUpdateTime
|
|
450
556
|
self._fileName = fileName
|
|
451
557
|
self.teeLogger = teeLogger
|
|
452
|
-
self.
|
|
558
|
+
self.delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
559
|
+
self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
|
|
453
560
|
self.correctColumnNum = -1
|
|
454
561
|
self.createIfNotExist = createIfNotExist
|
|
455
562
|
self.verifyHeader = verifyHeader
|
|
@@ -490,10 +597,10 @@ class TSVZed(OrderedDict):
|
|
|
490
597
|
if self.verbose:
|
|
491
598
|
self.__teePrintOrNot(f"Loading {self._fileName}")
|
|
492
599
|
super().clear()
|
|
493
|
-
|
|
600
|
+
readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = False, delimiter = self.delimiter)
|
|
494
601
|
if self.verbose:
|
|
495
602
|
self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
|
|
496
|
-
self.correctColumnNum = len(self.header.split(
|
|
603
|
+
self.correctColumnNum = len(self.header.split(self.delimiter)) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
|
|
497
604
|
if self.verbose:
|
|
498
605
|
self.__teePrintOrNot(f"correctColumnNum: {self.correctColumnNum}")
|
|
499
606
|
#super().update(loadedData)
|
|
@@ -510,7 +617,7 @@ class TSVZed(OrderedDict):
|
|
|
510
617
|
self.__teePrintOrNot('Key cannot be empty','error')
|
|
511
618
|
return
|
|
512
619
|
if type(value) == str:
|
|
513
|
-
value = value.strip().split(
|
|
620
|
+
value = value.strip().split(self.delimiter)
|
|
514
621
|
# sanitize the value
|
|
515
622
|
value = [(str(segment).strip() if type(segment) != str else segment.strip()) if segment else '' for segment in value]
|
|
516
623
|
#value = list(map(lambda segment: str(segment).strip(), value))
|
|
@@ -543,7 +650,7 @@ class TSVZed(OrderedDict):
|
|
|
543
650
|
return
|
|
544
651
|
if self.verbose:
|
|
545
652
|
self.__teePrintOrNot(f"Appending {key} to the appendQueue")
|
|
546
|
-
self.appendQueue.append(
|
|
653
|
+
self.appendQueue.append(self.delimiter.join(value))
|
|
547
654
|
self.lastUpdateTime = get_time_ns()
|
|
548
655
|
# if not self.appendThread.is_alive():
|
|
549
656
|
# self.commitAppendToFile()
|
|
@@ -567,10 +674,10 @@ class TSVZed(OrderedDict):
|
|
|
567
674
|
def __appendEmptyLine(self,key):
|
|
568
675
|
self.dirty = True
|
|
569
676
|
if self.correctColumnNum > 0:
|
|
570
|
-
emptyLine = key+
|
|
677
|
+
emptyLine = key+self.delimiter*(self.correctColumnNum-1)
|
|
571
678
|
elif len(self[key]) > 1:
|
|
572
679
|
self.correctColumnNum = len(self[key])
|
|
573
|
-
emptyLine = key+
|
|
680
|
+
emptyLine = key+self.delimiter*(self.correctColumnNum-1)
|
|
574
681
|
else:
|
|
575
682
|
emptyLine = key
|
|
576
683
|
if self.verbose:
|
|
@@ -745,7 +852,7 @@ memoryOnly:{self.memoryOnly}
|
|
|
745
852
|
if self.header:
|
|
746
853
|
file.write(self.header+'\n')
|
|
747
854
|
for key in self:
|
|
748
|
-
file.write(
|
|
855
|
+
file.write(self.delimiter.join(self[key])+'\n')
|
|
749
856
|
self.release_file_obj(file)
|
|
750
857
|
if self.verbose:
|
|
751
858
|
self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
|
|
@@ -764,32 +871,32 @@ memoryOnly:{self.memoryOnly}
|
|
|
764
871
|
try:
|
|
765
872
|
if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
|
|
766
873
|
self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
|
|
767
|
-
file = self.get_file_obj('r+')
|
|
874
|
+
file = self.get_file_obj('r+b')
|
|
768
875
|
overWrite = False
|
|
769
|
-
line = file.readline()
|
|
876
|
+
line = file.readline().decode(self.encoding)
|
|
770
877
|
aftPos = file.tell()
|
|
771
|
-
if self.header and not
|
|
878
|
+
if self.header and not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = False):
|
|
772
879
|
file.seek(0)
|
|
773
|
-
file.write(self.header
|
|
880
|
+
file.write(f'{self.header}\n'.encode(encoding=self.encoding))
|
|
774
881
|
# if the header is not the same length as the line, we need to overwrite the file
|
|
775
882
|
if aftPos != file.tell():
|
|
776
883
|
overWrite = True
|
|
777
884
|
if self.verbose:
|
|
778
885
|
self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
|
|
779
886
|
for value in self.values():
|
|
780
|
-
strToWrite =
|
|
887
|
+
strToWrite = self.delimiter.join(value)+'\n'
|
|
781
888
|
if overWrite:
|
|
782
889
|
if self.verbose:
|
|
783
890
|
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
784
|
-
file.write(strToWrite)
|
|
891
|
+
file.write(strToWrite.encode(encoding=self.encoding))
|
|
785
892
|
continue
|
|
786
893
|
pos = file.tell()
|
|
787
|
-
line = file.readline()
|
|
894
|
+
line = file.readline().decode(encoding=self.encoding)
|
|
788
895
|
aftPos = file.tell()
|
|
789
896
|
if not line or pos == aftPos:
|
|
790
897
|
if self.verbose:
|
|
791
898
|
self.__teePrintOrNot(f"End of file reached. Appending {value} to {self._fileName}")
|
|
792
|
-
file.write(strToWrite)
|
|
899
|
+
file.write(strToWrite.encode(encoding=self.encoding))
|
|
793
900
|
overWrite = True
|
|
794
901
|
continue
|
|
795
902
|
if line != strToWrite:
|
|
@@ -797,7 +904,8 @@ memoryOnly:{self.memoryOnly}
|
|
|
797
904
|
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
798
905
|
file.seek(pos)
|
|
799
906
|
# fill the string with space to write to the correct length
|
|
800
|
-
file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
|
|
907
|
+
#file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
|
|
908
|
+
file.write(strToWrite.encode(encoding=self.encoding).rstrip(b'\n').ljust(len(line)-1)+b'\n')
|
|
801
909
|
if aftPos != file.tell():
|
|
802
910
|
overWrite = True
|
|
803
911
|
file.truncate()
|
|
@@ -831,9 +939,10 @@ memoryOnly:{self.memoryOnly}
|
|
|
831
939
|
|
|
832
940
|
def _appendWorker(self):
|
|
833
941
|
while not self.shutdownEvent.is_set():
|
|
834
|
-
self.
|
|
835
|
-
|
|
836
|
-
|
|
942
|
+
if not self.memoryOnly:
|
|
943
|
+
self.checkExternalChanges()
|
|
944
|
+
self.rewrite()
|
|
945
|
+
self.commitAppendToFile()
|
|
837
946
|
time.sleep(self.append_check_delay)
|
|
838
947
|
# self.appendEvent.wait()
|
|
839
948
|
# self.appendEvent.clear()
|
|
@@ -883,15 +992,19 @@ memoryOnly:{self.memoryOnly}
|
|
|
883
992
|
def get_file_obj(self,modes = 'a'):
|
|
884
993
|
self.writeLock.acquire()
|
|
885
994
|
try:
|
|
886
|
-
if not
|
|
887
|
-
self.encoding
|
|
888
|
-
|
|
995
|
+
if 'b' not in modes:
|
|
996
|
+
if not self.encoding:
|
|
997
|
+
self.encoding = 'utf8'
|
|
998
|
+
file = open(self._fileName, mode=modes, encoding=self.encoding)
|
|
999
|
+
else:
|
|
1000
|
+
file = open(self._fileName, mode=modes)
|
|
889
1001
|
# Lock the file after opening
|
|
890
1002
|
if os.name == 'posix':
|
|
891
1003
|
fcntl.lockf(file, fcntl.LOCK_EX)
|
|
892
1004
|
elif os.name == 'nt':
|
|
893
1005
|
# For Windows, locking the entire file, avoiding locking an empty file
|
|
894
|
-
lock_length = max(1, os.path.getsize(self._fileName))
|
|
1006
|
+
#lock_length = max(1, os.path.getsize(self._fileName))
|
|
1007
|
+
lock_length = 2147483647
|
|
895
1008
|
msvcrt.locking(file.fileno(), msvcrt.LK_LOCK, lock_length)
|
|
896
1009
|
if self.verbose:
|
|
897
1010
|
self.__teePrintOrNot(f"File {self._fileName} locked with mode {modes}")
|
|
@@ -910,13 +1023,18 @@ memoryOnly:{self.memoryOnly}
|
|
|
910
1023
|
try:
|
|
911
1024
|
file.flush() # Ensure the file is flushed before unlocking
|
|
912
1025
|
os.fsync(file.fileno()) # Ensure the file is synced to disk before unlocking
|
|
913
|
-
if
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
1026
|
+
if not file.closed:
|
|
1027
|
+
if os.name == 'posix':
|
|
1028
|
+
fcntl.lockf(file, fcntl.LOCK_UN)
|
|
1029
|
+
elif os.name == 'nt':
|
|
1030
|
+
# Unlocking the entire file; for Windows, ensure not unlocking an empty file
|
|
1031
|
+
#unlock_length = max(1, os.path.getsize(os.path.realpath(file.name)))
|
|
1032
|
+
unlock_length = 2147483647
|
|
1033
|
+
try:
|
|
1034
|
+
msvcrt.locking(file.fileno(), msvcrt.LK_UNLCK, unlock_length)
|
|
1035
|
+
except:
|
|
1036
|
+
pass
|
|
1037
|
+
file.close() # Ensure file is closed after unlocking
|
|
920
1038
|
if self.verbose:
|
|
921
1039
|
self.__teePrintOrNot(f"File {file.name} unlocked / released")
|
|
922
1040
|
except Exception as e:
|
|
@@ -925,26 +1043,37 @@ memoryOnly:{self.memoryOnly}
|
|
|
925
1043
|
except Exception as e:
|
|
926
1044
|
self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
|
|
927
1045
|
self.__teePrintOrNot(f"Failed to release file {file.name}: {e}",'error')
|
|
928
|
-
|
|
929
|
-
self.
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
1046
|
+
import traceback
|
|
1047
|
+
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
1048
|
+
# release the write lock if not already released
|
|
1049
|
+
if self.writeLock.locked():
|
|
1050
|
+
try:
|
|
1051
|
+
self.writeLock.release() # Ensure the thread lock is always released
|
|
1052
|
+
except Exception as e:
|
|
1053
|
+
self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
|
|
1054
|
+
self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
|
|
933
1055
|
|
|
934
1056
|
|
|
935
1057
|
def __main__():
|
|
936
1058
|
import argparse
|
|
937
|
-
parser = argparse.ArgumentParser(description='TSVZed: A TSV file manager')
|
|
938
|
-
parser.add_argument('filename', type=str, help='The
|
|
1059
|
+
parser = argparse.ArgumentParser(description='TSVZed: A TSV / CSV / NSV file manager')
|
|
1060
|
+
parser.add_argument('filename', type=str, help='The file to read')
|
|
939
1061
|
parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear'], help='The operation to perform. Default: read', default='read')
|
|
940
|
-
parser.add_argument('line', type=str, nargs='*', help='The line to append to the
|
|
941
|
-
parser.add_argument('-
|
|
1062
|
+
parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
|
|
1063
|
+
parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
|
|
1064
|
+
parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
|
|
942
1065
|
parser.add_argument('-f', '--force', action='store_true', help='Force the operation. Ignore checks for column numbers / headers')
|
|
943
1066
|
parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
|
|
944
1067
|
parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
|
|
945
1068
|
args = parser.parse_args()
|
|
946
|
-
|
|
947
|
-
header
|
|
1069
|
+
args.delimiter = get_delimiter(delimiter=args.delimiter,file_name=args.filename)
|
|
1070
|
+
if args.header and args.header.endswith('\\'):
|
|
1071
|
+
args.header += '\\'
|
|
1072
|
+
try:
|
|
1073
|
+
header = args.header.encode().decode('unicode_escape') if args.header else ''
|
|
1074
|
+
except Exception as e:
|
|
1075
|
+
print(f"Failed to decode header: {args.header}")
|
|
1076
|
+
header = ''
|
|
948
1077
|
|
|
949
1078
|
if args.operation == 'read':
|
|
950
1079
|
# check if the file exist
|
|
@@ -952,14 +1081,14 @@ def __main__():
|
|
|
952
1081
|
print(f"File not found: {args.filename}")
|
|
953
1082
|
return
|
|
954
1083
|
# read the file
|
|
955
|
-
data =
|
|
956
|
-
print(pretty_format_table(data.values()))
|
|
1084
|
+
data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= not args.force, delimiter=args.delimiter)
|
|
1085
|
+
print(pretty_format_table(data.values(),delimiter=args.delimiter))
|
|
957
1086
|
elif args.operation == 'append':
|
|
958
|
-
|
|
1087
|
+
appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
|
|
959
1088
|
elif args.operation == 'delete':
|
|
960
|
-
|
|
1089
|
+
appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
|
|
961
1090
|
elif args.operation == 'clear':
|
|
962
|
-
|
|
1091
|
+
clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=not args.force, delimiter=args.delimiter)
|
|
963
1092
|
else:
|
|
964
1093
|
print("Invalid operation")
|
|
965
1094
|
return
|