TSVZ 2.67__py3-none-any.whl → 3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {TSVZ-2.67.dist-info → TSVZ-3.2.dist-info}/METADATA +57 -48
- TSVZ-3.2.dist-info/RECORD +6 -0
- {TSVZ-2.67.dist-info → TSVZ-3.2.dist-info}/WHEEL +1 -1
- {TSVZ-2.67.dist-info → TSVZ-3.2.dist-info}/entry_points.txt +0 -0
- {TSVZ-2.67.dist-info → TSVZ-3.2.dist-info}/top_level.txt +0 -0
- TSVZ.py +284 -124
- TSVZ-2.67.dist-info/LICENSE +0 -674
- TSVZ-2.67.dist-info/RECORD +0 -7
TSVZ.py
CHANGED
|
@@ -4,39 +4,90 @@ from collections import OrderedDict , deque
|
|
|
4
4
|
import time
|
|
5
5
|
import atexit
|
|
6
6
|
import threading
|
|
7
|
+
import re
|
|
7
8
|
|
|
8
9
|
if os.name == 'nt':
|
|
9
10
|
import msvcrt
|
|
10
11
|
elif os.name == 'posix':
|
|
11
12
|
import fcntl
|
|
12
13
|
|
|
13
|
-
version = '
|
|
14
|
+
version = '3.02'
|
|
14
15
|
author = 'pan@zopyr.us'
|
|
15
16
|
|
|
17
|
+
DEFAULT_DELIMITER = '\t'
|
|
16
18
|
|
|
17
|
-
def
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
19
|
+
def get_delimiter(delimiter,file_name = ''):
|
|
20
|
+
if not delimiter:
|
|
21
|
+
return DEFAULT_DELIMITER
|
|
22
|
+
elif delimiter == ...:
|
|
23
|
+
if not file_name:
|
|
24
|
+
rtn = '\t'
|
|
25
|
+
if file_name.endswith('.csv'):
|
|
26
|
+
rtn = ','
|
|
27
|
+
elif file_name.endswith('.nsv'):
|
|
28
|
+
rtn = '\0'
|
|
29
|
+
elif file_name.endswith('.psv'):
|
|
30
|
+
rtn = '|'
|
|
31
|
+
else:
|
|
32
|
+
rtn = '\t'
|
|
33
|
+
elif delimiter == 'comma':
|
|
34
|
+
rtn = ','
|
|
35
|
+
elif delimiter == 'tab':
|
|
36
|
+
rtn = '\t'
|
|
37
|
+
elif delimiter == 'pipe':
|
|
38
|
+
rtn = '|'
|
|
39
|
+
elif delimiter == 'null':
|
|
40
|
+
rtn = '\0'
|
|
41
|
+
else:
|
|
42
|
+
rtn = delimiter.encode().decode('unicode_escape')
|
|
43
|
+
DEFAULT_DELIMITER = rtn
|
|
44
|
+
return rtn
|
|
45
|
+
|
|
46
|
+
def pretty_format_table(data, delimiter = DEFAULT_DELIMITER):
|
|
47
|
+
version = 1.0
|
|
48
|
+
if not data:
|
|
49
|
+
return ''
|
|
50
|
+
if type(data) == str:
|
|
51
|
+
data = data.strip('\n').split('\n')
|
|
52
|
+
data = [line.split(delimiter) for line in data]
|
|
53
|
+
elif isinstance(data, dict):
|
|
54
|
+
# flatten the 2D dict to a list of lists
|
|
55
|
+
if isinstance(next(iter(data.values())), dict):
|
|
56
|
+
tempData = [['key'] + list(next(iter(data.values())).keys())]
|
|
57
|
+
tempData.extend( [[key] + list(value.values()) for key, value in data.items()])
|
|
58
|
+
data = tempData
|
|
59
|
+
else:
|
|
60
|
+
# it is a dict of lists
|
|
61
|
+
data = [[key] + list(value) for key, value in data.items()]
|
|
62
|
+
elif type(data) != list:
|
|
63
|
+
data = list(data)
|
|
64
|
+
# format the list into 2d list of list of strings
|
|
65
|
+
if isinstance(data[0], dict):
|
|
66
|
+
tempData = [data[0].keys()]
|
|
67
|
+
tempData.extend([list(item.values()) for item in data])
|
|
68
|
+
data = tempData
|
|
69
|
+
data = [[str(item) for item in row] for row in data]
|
|
70
|
+
num_cols = len(data[0])
|
|
71
|
+
col_widths = [0] * num_cols
|
|
72
|
+
# Calculate the maximum width of each column
|
|
73
|
+
for c in range(num_cols):
|
|
74
|
+
#col_widths[c] = max(len(row[c]) for row in data)
|
|
75
|
+
# handle ansii escape sequences
|
|
76
|
+
col_widths[c] = max(len(re.sub(r'\x1b\[[0-?]*[ -/]*[@-~]','',row[c])) for row in data)
|
|
77
|
+
# Build the row format string
|
|
78
|
+
row_format = ' | '.join('{{:<{}}}'.format(width) for width in col_widths)
|
|
79
|
+
# Print the header
|
|
80
|
+
header = data[0]
|
|
81
|
+
outTable = []
|
|
82
|
+
outTable.append(row_format.format(*header))
|
|
83
|
+
outTable.append('-+-'.join('-' * width for width in col_widths))
|
|
84
|
+
for row in data[1:]:
|
|
85
|
+
# if the row is empty, print an divider
|
|
86
|
+
if not any(row):
|
|
87
|
+
outTable.append('-+-'.join('-' * width for width in col_widths))
|
|
88
|
+
else:
|
|
89
|
+
outTable.append(row_format.format(*row))
|
|
90
|
+
return '\n'.join(outTable) + '\n'
|
|
40
91
|
|
|
41
92
|
def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
42
93
|
"""
|
|
@@ -58,7 +109,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
|
58
109
|
except Exception as e:
|
|
59
110
|
print(message,flush=True)
|
|
60
111
|
|
|
61
|
-
def
|
|
112
|
+
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER):
|
|
62
113
|
"""
|
|
63
114
|
Process a line of text and update the task dictionary.
|
|
64
115
|
|
|
@@ -74,7 +125,7 @@ def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,s
|
|
|
74
125
|
tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
|
|
75
126
|
|
|
76
127
|
"""
|
|
77
|
-
line = line.
|
|
128
|
+
line = line.strip(' ').strip('\x00').rstrip('\r\n')
|
|
78
129
|
# we throw away the lines that start with '#'
|
|
79
130
|
if not line :
|
|
80
131
|
if verbose:
|
|
@@ -85,7 +136,7 @@ def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,s
|
|
|
85
136
|
__teePrintOrNot(f"Ignoring comment line: {line}",teeLogger=teeLogger)
|
|
86
137
|
return correctColumnNum , []
|
|
87
138
|
# we only interested in the lines that have the correct number of columns
|
|
88
|
-
lineCache = [segment.strip() for segment in line.split(
|
|
139
|
+
lineCache = [segment.strip() for segment in line.split(delimiter)]
|
|
89
140
|
if not lineCache:
|
|
90
141
|
return correctColumnNum , []
|
|
91
142
|
if correctColumnNum == -1:
|
|
@@ -124,7 +175,7 @@ def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,s
|
|
|
124
175
|
__teePrintOrNot(f"Key {lineCache[0]} added after correction",teeLogger=teeLogger)
|
|
125
176
|
return correctColumnNum, lineCache
|
|
126
177
|
|
|
127
|
-
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False):
|
|
178
|
+
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...):
|
|
128
179
|
"""
|
|
129
180
|
Reads the last valid line from a file.
|
|
130
181
|
|
|
@@ -134,6 +185,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
134
185
|
correctColumnNum (int): A column number to pass to processLine function.
|
|
135
186
|
verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
136
187
|
teeLogger (optional): Logger to use for tee print. Defaults to None.
|
|
188
|
+
encoding (str, optional): The encoding of the file. Defaults to None.
|
|
137
189
|
strict (bool, optional): Whether to enforce strict processing. Defaults to False.
|
|
138
190
|
|
|
139
191
|
Returns:
|
|
@@ -141,6 +193,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
141
193
|
"""
|
|
142
194
|
chunk_size = 1024 # Read in chunks of 1024 bytes
|
|
143
195
|
last_valid_line = []
|
|
196
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
144
197
|
if verbose:
|
|
145
198
|
__teePrintOrNot(f"Reading last line only from {fileName}",teeLogger=teeLogger)
|
|
146
199
|
with open(fileName, 'rb') as file:
|
|
@@ -166,13 +219,14 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
166
219
|
for i in range(len(lines) - 1, -1, -1):
|
|
167
220
|
if lines[i].strip(): # Skip empty lines
|
|
168
221
|
# Process the line
|
|
169
|
-
correctColumnNum, lineCache =
|
|
170
|
-
lines[i],
|
|
222
|
+
correctColumnNum, lineCache = _processLine(
|
|
223
|
+
lines[i].decode(encoding=encoding),
|
|
171
224
|
taskDic,
|
|
172
225
|
correctColumnNum,
|
|
173
226
|
verbose=verbose,
|
|
174
227
|
teeLogger=teeLogger,
|
|
175
|
-
strict=strict
|
|
228
|
+
strict=strict,
|
|
229
|
+
delimiter=delimiter
|
|
176
230
|
)
|
|
177
231
|
# If the line is valid, return it
|
|
178
232
|
if lineCache and any(lineCache):
|
|
@@ -184,7 +238,7 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
184
238
|
# Return empty list if no valid line found
|
|
185
239
|
return last_valid_line
|
|
186
240
|
|
|
187
|
-
def
|
|
241
|
+
def _formatHeader(header,verbose = False,teeLogger = None,delimiter = DEFAULT_DELIMITER):
|
|
188
242
|
"""
|
|
189
243
|
Format the header string.
|
|
190
244
|
|
|
@@ -198,12 +252,12 @@ def formatHeader(header,verbose = False,teeLogger = None):
|
|
|
198
252
|
"""
|
|
199
253
|
if type(header) != str:
|
|
200
254
|
try:
|
|
201
|
-
header =
|
|
255
|
+
header = delimiter.join(header)
|
|
202
256
|
except:
|
|
203
257
|
if verbose:
|
|
204
258
|
__teePrintOrNot('Invalid header, setting header to empty.','error',teeLogger=teeLogger)
|
|
205
259
|
header = ''
|
|
206
|
-
header = header.
|
|
260
|
+
header = delimiter.join([segment.rstrip() for segment in header.split(delimiter)])
|
|
207
261
|
# if header:
|
|
208
262
|
# if not header.endswith('\n'):
|
|
209
263
|
# header += '\n'
|
|
@@ -211,7 +265,7 @@ def formatHeader(header,verbose = False,teeLogger = None):
|
|
|
211
265
|
# header = ''
|
|
212
266
|
return header
|
|
213
267
|
|
|
214
|
-
def
|
|
268
|
+
def _lineContainHeader(header,line,verbose = False,teeLogger = None,strict = False,delimiter = DEFAULT_DELIMITER):
|
|
215
269
|
"""
|
|
216
270
|
Verify if a line contains the header.
|
|
217
271
|
|
|
@@ -225,22 +279,24 @@ def lineContainHeader(header,line,verbose = False,teeLogger = None,strict = Fals
|
|
|
225
279
|
Returns:
|
|
226
280
|
bool: True if the header matches the line, False otherwise.
|
|
227
281
|
"""
|
|
282
|
+
header = [segment.rstrip() for segment in header.split(delimiter)]
|
|
283
|
+
line = [segment.rstrip() for segment in line.split(delimiter)]
|
|
228
284
|
if verbose:
|
|
229
|
-
__teePrintOrNot(f"Header: {header
|
|
230
|
-
__teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
|
|
231
|
-
if
|
|
232
|
-
__teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header
|
|
285
|
+
__teePrintOrNot(f"Header: \n{header}",teeLogger=teeLogger)
|
|
286
|
+
__teePrintOrNot(f"First line: \n{line}",teeLogger=teeLogger)
|
|
287
|
+
if len(header) != len(line) or any([header[i] not in line[i] for i in range(len(header))]):
|
|
288
|
+
__teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header}",teeLogger=teeLogger)
|
|
233
289
|
if strict:
|
|
234
290
|
raise Exception("Data format error! Header mismatch")
|
|
235
291
|
return False
|
|
236
292
|
return True
|
|
237
293
|
|
|
238
|
-
def
|
|
294
|
+
def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,header = '',encoding = 'utf8',strict = True,delimiter = DEFAULT_DELIMITER):
|
|
239
295
|
"""
|
|
240
|
-
Verify the existence of
|
|
296
|
+
Verify the existence of the tabular file.
|
|
241
297
|
|
|
242
298
|
Parameters:
|
|
243
|
-
- fileName (str): The path of the
|
|
299
|
+
- fileName (str): The path of the tabular file.
|
|
244
300
|
- createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to True.
|
|
245
301
|
- teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
|
|
246
302
|
- header (str, optional): The header line to verify against. Defaults to ''.
|
|
@@ -250,8 +306,14 @@ def verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = None,header
|
|
|
250
306
|
Returns:
|
|
251
307
|
bool: True if the file exists, False otherwise.
|
|
252
308
|
"""
|
|
253
|
-
if not fileName.endswith('.tsv'):
|
|
309
|
+
if delimiter and delimiter == '\t' and not fileName.endswith('.tsv'):
|
|
254
310
|
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .tsv','warning',teeLogger=teeLogger)
|
|
311
|
+
elif delimiter and delimiter == ',' and not fileName.endswith('.csv'):
|
|
312
|
+
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .csv','warning',teeLogger=teeLogger)
|
|
313
|
+
elif delimiter and delimiter == '\0' and not fileName.endswith('.nsv'):
|
|
314
|
+
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .nsv','warning',teeLogger=teeLogger)
|
|
315
|
+
elif delimiter and delimiter == '|' and not fileName.endswith('.psv'):
|
|
316
|
+
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .psv','warning',teeLogger=teeLogger)
|
|
255
317
|
if not os.path.isfile(fileName):
|
|
256
318
|
if createIfNotExist:
|
|
257
319
|
with open(fileName, mode ='w',encoding=encoding)as file:
|
|
@@ -265,14 +327,41 @@ def verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = None,header
|
|
|
265
327
|
return False
|
|
266
328
|
return True
|
|
267
329
|
|
|
268
|
-
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True):
|
|
330
|
+
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t'):
|
|
331
|
+
"""
|
|
332
|
+
Compatibility method, calls readTabularFile.
|
|
333
|
+
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
334
|
+
|
|
335
|
+
Parameters:
|
|
336
|
+
- fileName (str): The path to the Tabular file.
|
|
337
|
+
- teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
|
|
338
|
+
- header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
|
|
339
|
+
- createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
|
|
340
|
+
- lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
|
|
341
|
+
- verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
|
|
342
|
+
- verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
343
|
+
- taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
|
|
344
|
+
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
345
|
+
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
346
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
350
|
+
|
|
351
|
+
Raises:
|
|
352
|
+
- Exception: If the file is not found or there is a data format error.
|
|
353
|
+
|
|
269
354
|
"""
|
|
270
|
-
|
|
355
|
+
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
356
|
+
|
|
357
|
+
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...):
|
|
358
|
+
"""
|
|
359
|
+
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
271
360
|
|
|
272
361
|
Parameters:
|
|
273
|
-
- fileName (str): The path to the
|
|
362
|
+
- fileName (str): The path to the Tabular file.
|
|
274
363
|
- teeLogger (Logger, optional): The logger object to log messages. Defaults to None.
|
|
275
|
-
- header (str or list, optional): The header of the
|
|
364
|
+
- header (str or list, optional): The header of the Tabular file. If a string, it should be a tab-separated list of column names. If a list, it should contain the column names. Defaults to ''.
|
|
276
365
|
- createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to False.
|
|
277
366
|
- lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
|
|
278
367
|
- verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
|
|
@@ -280,9 +369,10 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
280
369
|
- taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
|
|
281
370
|
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
282
371
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
372
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
283
373
|
|
|
284
374
|
Returns:
|
|
285
|
-
- OrderedDict: The dictionary containing the data from the
|
|
375
|
+
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
286
376
|
|
|
287
377
|
Raises:
|
|
288
378
|
- Exception: If the file is not found or there is a data format error.
|
|
@@ -290,33 +380,55 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
290
380
|
"""
|
|
291
381
|
if taskDic is None:
|
|
292
382
|
taskDic = {}
|
|
293
|
-
|
|
294
|
-
|
|
383
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
384
|
+
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger, delimiter = delimiter)
|
|
385
|
+
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
295
386
|
return taskDic
|
|
296
387
|
with open(fileName, mode ='rb')as file:
|
|
297
388
|
correctColumnNum = -1
|
|
298
|
-
if header.
|
|
389
|
+
if header.rstrip():
|
|
299
390
|
if verifyHeader:
|
|
300
|
-
line = file.readline().decode()
|
|
301
|
-
if
|
|
302
|
-
correctColumnNum = len(header.
|
|
391
|
+
line = file.readline().decode(encoding=encoding)
|
|
392
|
+
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
393
|
+
correctColumnNum = len(header.split(delimiter))
|
|
303
394
|
if verbose:
|
|
304
395
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
305
396
|
if lastLineOnly:
|
|
306
|
-
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict)
|
|
397
|
+
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter)
|
|
307
398
|
if lineCache:
|
|
308
399
|
taskDic[lineCache[0]] = lineCache
|
|
309
400
|
return lineCache
|
|
310
401
|
for line in file:
|
|
311
|
-
correctColumnNum, lineCache =
|
|
402
|
+
correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter)
|
|
312
403
|
return taskDic
|
|
313
404
|
|
|
314
|
-
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True):
|
|
405
|
+
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
|
|
406
|
+
"""
|
|
407
|
+
Compatibility method, calls appendTabularFile.
|
|
408
|
+
Append a line of data to a Tabular file.
|
|
409
|
+
Parameters:
|
|
410
|
+
- fileName (str): The path of the Tabular file.
|
|
411
|
+
- lineToAppend (str or list): The line of data to append. If it is a string, it will be split by delimiter to form a list.
|
|
412
|
+
- teeLogger (optional): A logger object for logging messages.
|
|
413
|
+
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
414
|
+
- createIfNotExist (bool, optional): If True, the file will be created if it does not exist. If False and the file does not exist, an exception will be raised.
|
|
415
|
+
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
416
|
+
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
417
|
+
- encoding (str, optional): The encoding of the file.
|
|
418
|
+
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
419
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
420
|
+
Raises:
|
|
421
|
+
- Exception: If the file does not exist and createIfNotExist is False.
|
|
422
|
+
- Exception: If the existing header does not match the provided header.
|
|
423
|
+
"""
|
|
424
|
+
return appendTabularFile(fileName,lineToAppend,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding, strict = strict, delimiter = delimiter)
|
|
425
|
+
|
|
426
|
+
def appendTabularFile(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = ...):
|
|
315
427
|
"""
|
|
316
|
-
Append a line of data to a
|
|
428
|
+
Append a line of data to a Tabular file.
|
|
317
429
|
Parameters:
|
|
318
|
-
- fileName (str): The path of the
|
|
319
|
-
- lineToAppend (str or list): The line of data to append. If it is a string, it will be split by
|
|
430
|
+
- fileName (str): The path of the Tabular file.
|
|
431
|
+
- lineToAppend (str or list): The line of data to append. If it is a string, it will be split by delimiter to form a list.
|
|
320
432
|
- teeLogger (optional): A logger object for logging messages.
|
|
321
433
|
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
322
434
|
- createIfNotExist (bool, optional): If True, the file will be created if it does not exist. If False and the file does not exist, an exception will be raised.
|
|
@@ -324,23 +436,32 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
|
|
|
324
436
|
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
325
437
|
- encoding (str, optional): The encoding of the file.
|
|
326
438
|
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
439
|
+
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
327
440
|
Raises:
|
|
328
441
|
- Exception: If the file does not exist and createIfNotExist is False.
|
|
329
442
|
- Exception: If the existing header does not match the provided header.
|
|
330
443
|
"""
|
|
331
|
-
|
|
332
|
-
|
|
444
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
445
|
+
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger,delimiter=delimiter)
|
|
446
|
+
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
333
447
|
return
|
|
334
448
|
if type(lineToAppend) == str:
|
|
335
|
-
lineToAppend = lineToAppend.strip().split(
|
|
449
|
+
lineToAppend = lineToAppend.strip().split(delimiter)
|
|
450
|
+
else:
|
|
451
|
+
for i in range(len(lineToAppend)):
|
|
452
|
+
if type(lineToAppend[i]) != str:
|
|
453
|
+
try:
|
|
454
|
+
lineToAppend[i] = str(lineToAppend[i])
|
|
455
|
+
except Exception as e:
|
|
456
|
+
lineToAppend[i] = str(e)
|
|
336
457
|
|
|
337
458
|
with open(fileName, mode ='r+b')as file:
|
|
338
459
|
correctColumnNum = len(lineToAppend)
|
|
339
|
-
if header.
|
|
460
|
+
if header.rstrip():
|
|
340
461
|
if verifyHeader:
|
|
341
|
-
line = file.readline().decode()
|
|
342
|
-
if
|
|
343
|
-
correctColumnNum = len(header.
|
|
462
|
+
line = file.readline().decode(encoding=encoding)
|
|
463
|
+
if _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
464
|
+
correctColumnNum = len(header.split(delimiter))
|
|
344
465
|
if verbose:
|
|
345
466
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
346
467
|
# truncate / fill the lineToAppend to the correct number of columns
|
|
@@ -352,15 +473,16 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
|
|
|
352
473
|
file.seek(-1, os.SEEK_END)
|
|
353
474
|
if file.read(1) != b'\n':
|
|
354
475
|
file.write(b'\n')
|
|
355
|
-
file.write(
|
|
476
|
+
file.write(get_delimiter(delimiter).join(lineToAppend).encode(encoding=encoding) + b'\n')
|
|
356
477
|
if verbose:
|
|
357
478
|
__teePrintOrNot(f"Appended {lineToAppend} to {fileName}",teeLogger=teeLogger)
|
|
358
479
|
|
|
359
|
-
def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False):
|
|
480
|
+
def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False,delimiter = '\t'):
|
|
360
481
|
"""
|
|
361
|
-
|
|
482
|
+
Compatibility method, calls clearTabularFile.
|
|
483
|
+
Clear the contents of a Tabular file. Will create if not exist.
|
|
362
484
|
Parameters:
|
|
363
|
-
- fileName (str): The path of the
|
|
485
|
+
- fileName (str): The path of the Tabular file.
|
|
364
486
|
- teeLogger (optional): A logger object for logging messages.
|
|
365
487
|
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
366
488
|
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
@@ -368,14 +490,29 @@ def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose
|
|
|
368
490
|
- encoding (str, optional): The encoding of the file.
|
|
369
491
|
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
370
492
|
"""
|
|
371
|
-
header =
|
|
372
|
-
|
|
493
|
+
return clearTabularFile(fileName,teeLogger = teeLogger,header = header,verifyHeader = verifyHeader,verbose = verbose,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
494
|
+
|
|
495
|
+
def clearTabularFile(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False,delimiter = ...):
|
|
496
|
+
"""
|
|
497
|
+
Clear the contents of a Tabular file. Will create if not exist.
|
|
498
|
+
Parameters:
|
|
499
|
+
- fileName (str): The path of the Tabular file.
|
|
500
|
+
- teeLogger (optional): A logger object for logging messages.
|
|
501
|
+
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
502
|
+
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
503
|
+
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
504
|
+
- encoding (str, optional): The encoding of the file.
|
|
505
|
+
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
506
|
+
"""
|
|
507
|
+
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
508
|
+
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger,delimiter=delimiter)
|
|
509
|
+
if not _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False,delimiter=delimiter):
|
|
373
510
|
raise Exception("Something catastrophic happened! File still not found after creation")
|
|
374
511
|
else:
|
|
375
512
|
with open(fileName, mode ='r+',encoding=encoding)as file:
|
|
376
|
-
if header.
|
|
377
|
-
line = file.readline()
|
|
378
|
-
if not
|
|
513
|
+
if header.rstrip() and verifyHeader:
|
|
514
|
+
line = file.readline()
|
|
515
|
+
if not _lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
379
516
|
__teePrintOrNot(f'Warning: Header mismatch in {fileName}. Keeping original header in file...','warning',teeLogger)
|
|
380
517
|
file.truncate()
|
|
381
518
|
else:
|
|
@@ -411,14 +548,15 @@ class TSVZed(OrderedDict):
|
|
|
411
548
|
except Exception as e:
|
|
412
549
|
print(message,flush=True)
|
|
413
550
|
|
|
414
|
-
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding =
|
|
551
|
+
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...):
|
|
415
552
|
super().__init__()
|
|
416
553
|
self.version = version
|
|
417
554
|
self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
|
|
418
555
|
self.lastUpdateTime = self.externalFileUpdateTime
|
|
419
556
|
self._fileName = fileName
|
|
420
557
|
self.teeLogger = teeLogger
|
|
421
|
-
self.
|
|
558
|
+
self.delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
559
|
+
self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
|
|
422
560
|
self.correctColumnNum = -1
|
|
423
561
|
self.createIfNotExist = createIfNotExist
|
|
424
562
|
self.verifyHeader = verifyHeader
|
|
@@ -459,10 +597,10 @@ class TSVZed(OrderedDict):
|
|
|
459
597
|
if self.verbose:
|
|
460
598
|
self.__teePrintOrNot(f"Loading {self._fileName}")
|
|
461
599
|
super().clear()
|
|
462
|
-
|
|
600
|
+
readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = False, delimiter = self.delimiter)
|
|
463
601
|
if self.verbose:
|
|
464
602
|
self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
|
|
465
|
-
self.correctColumnNum = len(self.header.split(
|
|
603
|
+
self.correctColumnNum = len(self.header.split(self.delimiter)) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
|
|
466
604
|
if self.verbose:
|
|
467
605
|
self.__teePrintOrNot(f"correctColumnNum: {self.correctColumnNum}")
|
|
468
606
|
#super().update(loadedData)
|
|
@@ -479,7 +617,7 @@ class TSVZed(OrderedDict):
|
|
|
479
617
|
self.__teePrintOrNot('Key cannot be empty','error')
|
|
480
618
|
return
|
|
481
619
|
if type(value) == str:
|
|
482
|
-
value = value.strip().split(
|
|
620
|
+
value = value.strip().split(self.delimiter)
|
|
483
621
|
# sanitize the value
|
|
484
622
|
value = [(str(segment).strip() if type(segment) != str else segment.strip()) if segment else '' for segment in value]
|
|
485
623
|
#value = list(map(lambda segment: str(segment).strip(), value))
|
|
@@ -512,7 +650,7 @@ class TSVZed(OrderedDict):
|
|
|
512
650
|
return
|
|
513
651
|
if self.verbose:
|
|
514
652
|
self.__teePrintOrNot(f"Appending {key} to the appendQueue")
|
|
515
|
-
self.appendQueue.append(
|
|
653
|
+
self.appendQueue.append(self.delimiter.join(value))
|
|
516
654
|
self.lastUpdateTime = get_time_ns()
|
|
517
655
|
# if not self.appendThread.is_alive():
|
|
518
656
|
# self.commitAppendToFile()
|
|
@@ -536,10 +674,10 @@ class TSVZed(OrderedDict):
|
|
|
536
674
|
def __appendEmptyLine(self,key):
|
|
537
675
|
self.dirty = True
|
|
538
676
|
if self.correctColumnNum > 0:
|
|
539
|
-
emptyLine = key+
|
|
677
|
+
emptyLine = key+self.delimiter*(self.correctColumnNum-1)
|
|
540
678
|
elif len(self[key]) > 1:
|
|
541
679
|
self.correctColumnNum = len(self[key])
|
|
542
|
-
emptyLine = key+
|
|
680
|
+
emptyLine = key+self.delimiter*(self.correctColumnNum-1)
|
|
543
681
|
else:
|
|
544
682
|
emptyLine = key
|
|
545
683
|
if self.verbose:
|
|
@@ -714,7 +852,7 @@ memoryOnly:{self.memoryOnly}
|
|
|
714
852
|
if self.header:
|
|
715
853
|
file.write(self.header+'\n')
|
|
716
854
|
for key in self:
|
|
717
|
-
file.write(
|
|
855
|
+
file.write(self.delimiter.join(self[key])+'\n')
|
|
718
856
|
self.release_file_obj(file)
|
|
719
857
|
if self.verbose:
|
|
720
858
|
self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
|
|
@@ -733,32 +871,32 @@ memoryOnly:{self.memoryOnly}
|
|
|
733
871
|
try:
|
|
734
872
|
if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
|
|
735
873
|
self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
|
|
736
|
-
file = self.get_file_obj('r+')
|
|
874
|
+
file = self.get_file_obj('r+b')
|
|
737
875
|
overWrite = False
|
|
738
|
-
line = file.readline()
|
|
876
|
+
line = file.readline().decode(self.encoding)
|
|
739
877
|
aftPos = file.tell()
|
|
740
|
-
if self.header and not
|
|
878
|
+
if self.header and not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = False):
|
|
741
879
|
file.seek(0)
|
|
742
|
-
file.write(self.header
|
|
880
|
+
file.write(f'{self.header}\n'.encode(encoding=self.encoding))
|
|
743
881
|
# if the header is not the same length as the line, we need to overwrite the file
|
|
744
882
|
if aftPos != file.tell():
|
|
745
883
|
overWrite = True
|
|
746
884
|
if self.verbose:
|
|
747
885
|
self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
|
|
748
886
|
for value in self.values():
|
|
749
|
-
strToWrite =
|
|
887
|
+
strToWrite = self.delimiter.join(value)+'\n'
|
|
750
888
|
if overWrite:
|
|
751
889
|
if self.verbose:
|
|
752
890
|
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
753
|
-
file.write(strToWrite)
|
|
891
|
+
file.write(strToWrite.encode(encoding=self.encoding))
|
|
754
892
|
continue
|
|
755
893
|
pos = file.tell()
|
|
756
|
-
line = file.readline()
|
|
894
|
+
line = file.readline().decode(encoding=self.encoding)
|
|
757
895
|
aftPos = file.tell()
|
|
758
896
|
if not line or pos == aftPos:
|
|
759
897
|
if self.verbose:
|
|
760
898
|
self.__teePrintOrNot(f"End of file reached. Appending {value} to {self._fileName}")
|
|
761
|
-
file.write(strToWrite)
|
|
899
|
+
file.write(strToWrite.encode(encoding=self.encoding))
|
|
762
900
|
overWrite = True
|
|
763
901
|
continue
|
|
764
902
|
if line != strToWrite:
|
|
@@ -766,7 +904,8 @@ memoryOnly:{self.memoryOnly}
|
|
|
766
904
|
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
767
905
|
file.seek(pos)
|
|
768
906
|
# fill the string with space to write to the correct length
|
|
769
|
-
file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
|
|
907
|
+
#file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
|
|
908
|
+
file.write(strToWrite.encode(encoding=self.encoding).rstrip(b'\n').ljust(len(line)-1)+b'\n')
|
|
770
909
|
if aftPos != file.tell():
|
|
771
910
|
overWrite = True
|
|
772
911
|
file.truncate()
|
|
@@ -800,9 +939,10 @@ memoryOnly:{self.memoryOnly}
|
|
|
800
939
|
|
|
801
940
|
def _appendWorker(self):
|
|
802
941
|
while not self.shutdownEvent.is_set():
|
|
803
|
-
self.
|
|
804
|
-
|
|
805
|
-
|
|
942
|
+
if not self.memoryOnly:
|
|
943
|
+
self.checkExternalChanges()
|
|
944
|
+
self.rewrite()
|
|
945
|
+
self.commitAppendToFile()
|
|
806
946
|
time.sleep(self.append_check_delay)
|
|
807
947
|
# self.appendEvent.wait()
|
|
808
948
|
# self.appendEvent.clear()
|
|
@@ -852,15 +992,19 @@ memoryOnly:{self.memoryOnly}
|
|
|
852
992
|
def get_file_obj(self,modes = 'a'):
|
|
853
993
|
self.writeLock.acquire()
|
|
854
994
|
try:
|
|
855
|
-
if not
|
|
856
|
-
self.encoding
|
|
857
|
-
|
|
995
|
+
if 'b' not in modes:
|
|
996
|
+
if not self.encoding:
|
|
997
|
+
self.encoding = 'utf8'
|
|
998
|
+
file = open(self._fileName, mode=modes, encoding=self.encoding)
|
|
999
|
+
else:
|
|
1000
|
+
file = open(self._fileName, mode=modes)
|
|
858
1001
|
# Lock the file after opening
|
|
859
1002
|
if os.name == 'posix':
|
|
860
1003
|
fcntl.lockf(file, fcntl.LOCK_EX)
|
|
861
1004
|
elif os.name == 'nt':
|
|
862
1005
|
# For Windows, locking the entire file, avoiding locking an empty file
|
|
863
|
-
lock_length = max(1, os.path.getsize(self._fileName))
|
|
1006
|
+
#lock_length = max(1, os.path.getsize(self._fileName))
|
|
1007
|
+
lock_length = 2147483647
|
|
864
1008
|
msvcrt.locking(file.fileno(), msvcrt.LK_LOCK, lock_length)
|
|
865
1009
|
if self.verbose:
|
|
866
1010
|
self.__teePrintOrNot(f"File {self._fileName} locked with mode {modes}")
|
|
@@ -879,13 +1023,18 @@ memoryOnly:{self.memoryOnly}
|
|
|
879
1023
|
try:
|
|
880
1024
|
file.flush() # Ensure the file is flushed before unlocking
|
|
881
1025
|
os.fsync(file.fileno()) # Ensure the file is synced to disk before unlocking
|
|
882
|
-
if
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
1026
|
+
if not file.closed:
|
|
1027
|
+
if os.name == 'posix':
|
|
1028
|
+
fcntl.lockf(file, fcntl.LOCK_UN)
|
|
1029
|
+
elif os.name == 'nt':
|
|
1030
|
+
# Unlocking the entire file; for Windows, ensure not unlocking an empty file
|
|
1031
|
+
#unlock_length = max(1, os.path.getsize(os.path.realpath(file.name)))
|
|
1032
|
+
unlock_length = 2147483647
|
|
1033
|
+
try:
|
|
1034
|
+
msvcrt.locking(file.fileno(), msvcrt.LK_UNLCK, unlock_length)
|
|
1035
|
+
except:
|
|
1036
|
+
pass
|
|
1037
|
+
file.close() # Ensure file is closed after unlocking
|
|
889
1038
|
if self.verbose:
|
|
890
1039
|
self.__teePrintOrNot(f"File {file.name} unlocked / released")
|
|
891
1040
|
except Exception as e:
|
|
@@ -894,26 +1043,37 @@ memoryOnly:{self.memoryOnly}
|
|
|
894
1043
|
except Exception as e:
|
|
895
1044
|
self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
|
|
896
1045
|
self.__teePrintOrNot(f"Failed to release file {file.name}: {e}",'error')
|
|
897
|
-
|
|
898
|
-
self.
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
1046
|
+
import traceback
|
|
1047
|
+
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
1048
|
+
# release the write lock if not already released
|
|
1049
|
+
if self.writeLock.locked():
|
|
1050
|
+
try:
|
|
1051
|
+
self.writeLock.release() # Ensure the thread lock is always released
|
|
1052
|
+
except Exception as e:
|
|
1053
|
+
self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
|
|
1054
|
+
self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
|
|
902
1055
|
|
|
903
1056
|
|
|
904
1057
|
def __main__():
|
|
905
1058
|
import argparse
|
|
906
|
-
parser = argparse.ArgumentParser(description='TSVZed: A TSV file manager')
|
|
907
|
-
parser.add_argument('filename', type=str, help='The
|
|
1059
|
+
parser = argparse.ArgumentParser(description='TSVZed: A TSV / CSV / NSV file manager')
|
|
1060
|
+
parser.add_argument('filename', type=str, help='The file to read')
|
|
908
1061
|
parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear'], help='The operation to perform. Default: read', default='read')
|
|
909
|
-
parser.add_argument('line', type=str, nargs='*', help='The line to append to the
|
|
910
|
-
parser.add_argument('-
|
|
1062
|
+
parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
|
|
1063
|
+
parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
|
|
1064
|
+
parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
|
|
911
1065
|
parser.add_argument('-f', '--force', action='store_true', help='Force the operation. Ignore checks for column numbers / headers')
|
|
912
1066
|
parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
|
|
913
1067
|
parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
|
|
914
1068
|
args = parser.parse_args()
|
|
915
|
-
|
|
916
|
-
header
|
|
1069
|
+
args.delimiter = get_delimiter(delimiter=args.delimiter,file_name=args.filename)
|
|
1070
|
+
if args.header and args.header.endswith('\\'):
|
|
1071
|
+
args.header += '\\'
|
|
1072
|
+
try:
|
|
1073
|
+
header = args.header.encode().decode('unicode_escape') if args.header else ''
|
|
1074
|
+
except Exception as e:
|
|
1075
|
+
print(f"Failed to decode header: {args.header}")
|
|
1076
|
+
header = ''
|
|
917
1077
|
|
|
918
1078
|
if args.operation == 'read':
|
|
919
1079
|
# check if the file exist
|
|
@@ -921,14 +1081,14 @@ def __main__():
|
|
|
921
1081
|
print(f"File not found: {args.filename}")
|
|
922
1082
|
return
|
|
923
1083
|
# read the file
|
|
924
|
-
data =
|
|
925
|
-
print(pretty_format_table(data.values()))
|
|
1084
|
+
data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= not args.force, delimiter=args.delimiter)
|
|
1085
|
+
print(pretty_format_table(data.values(),delimiter=args.delimiter))
|
|
926
1086
|
elif args.operation == 'append':
|
|
927
|
-
|
|
1087
|
+
appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
|
|
928
1088
|
elif args.operation == 'delete':
|
|
929
|
-
|
|
1089
|
+
appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
|
|
930
1090
|
elif args.operation == 'clear':
|
|
931
|
-
|
|
1091
|
+
clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=not args.force, delimiter=args.delimiter)
|
|
932
1092
|
else:
|
|
933
1093
|
print("Invalid operation")
|
|
934
1094
|
return
|