TSVZ 3.2__tar.gz → 3.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsvz-3.2 → tsvz-3.11}/PKG-INFO +1 -1
- {tsvz-3.2 → tsvz-3.11}/TSVZ.egg-info/PKG-INFO +1 -1
- {tsvz-3.2 → tsvz-3.11}/TSVZ.py +294 -58
- {tsvz-3.2 → tsvz-3.11}/README.md +0 -0
- {tsvz-3.2 → tsvz-3.11}/TSVZ.egg-info/SOURCES.txt +0 -0
- {tsvz-3.2 → tsvz-3.11}/TSVZ.egg-info/dependency_links.txt +0 -0
- {tsvz-3.2 → tsvz-3.11}/TSVZ.egg-info/entry_points.txt +0 -0
- {tsvz-3.2 → tsvz-3.11}/TSVZ.egg-info/top_level.txt +0 -0
- {tsvz-3.2 → tsvz-3.11}/setup.cfg +0 -0
- {tsvz-3.2 → tsvz-3.11}/setup.py +0 -0
{tsvz-3.2 → tsvz-3.11}/PKG-INFO
RENAMED
{tsvz-3.2 → tsvz-3.11}/TSVZ.py
RENAMED
|
@@ -6,15 +6,22 @@ import atexit
|
|
|
6
6
|
import threading
|
|
7
7
|
import re
|
|
8
8
|
|
|
9
|
+
RESOURCE_LIB_AVAILABLE = True
|
|
10
|
+
try:
|
|
11
|
+
import resource
|
|
12
|
+
except:
|
|
13
|
+
RESOURCE_LIB_AVAILABLE = False
|
|
14
|
+
|
|
9
15
|
if os.name == 'nt':
|
|
10
16
|
import msvcrt
|
|
11
17
|
elif os.name == 'posix':
|
|
12
18
|
import fcntl
|
|
13
19
|
|
|
14
|
-
version = '3.
|
|
20
|
+
version = '3.11'
|
|
15
21
|
author = 'pan@zopyr.us'
|
|
16
22
|
|
|
17
23
|
DEFAULT_DELIMITER = '\t'
|
|
24
|
+
DEFAULTS_INDICATOR_KEY = '#_defaults_#'
|
|
18
25
|
|
|
19
26
|
def get_delimiter(delimiter,file_name = ''):
|
|
20
27
|
if not delimiter:
|
|
@@ -89,6 +96,125 @@ def pretty_format_table(data, delimiter = DEFAULT_DELIMITER):
|
|
|
89
96
|
outTable.append(row_format.format(*row))
|
|
90
97
|
return '\n'.join(outTable) + '\n'
|
|
91
98
|
|
|
99
|
+
def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_format='.2f'):
|
|
100
|
+
"""
|
|
101
|
+
Format the size in bytes to a human-readable format or vice versa.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
size (int or str): The size in bytes or a string representation of the size.
|
|
105
|
+
use_1024_bytes (bool, optional): Whether to use 1024 bytes as the base for conversion. If None, it will be determined automatically. Default is None.
|
|
106
|
+
to_int (bool, optional): Whether to convert the size to an integer. Default is False.
|
|
107
|
+
to_str (bool, optional): Whether to convert the size to a string representation. Default is False.
|
|
108
|
+
str_format (str, optional): The format string to use when converting the size to a string. Default is '.2f'.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
int or str: The formatted size based on the provided arguments.
|
|
112
|
+
|
|
113
|
+
Examples:
|
|
114
|
+
>>> format_bytes(1500)
|
|
115
|
+
'1.50 KB'
|
|
116
|
+
>>> format_bytes('1.5 GiB', to_int=True)
|
|
117
|
+
1610612736
|
|
118
|
+
"""
|
|
119
|
+
if to_int or isinstance(size, str):
|
|
120
|
+
if isinstance(size, int):
|
|
121
|
+
return size
|
|
122
|
+
elif isinstance(size, str):
|
|
123
|
+
# Use regular expression to split the numeric part from the unit, handling optional whitespace
|
|
124
|
+
match = re.match(r"(\d+(\.\d+)?)\s*([a-zA-Z]*)", size)
|
|
125
|
+
if not match:
|
|
126
|
+
print("Invalid size format. Expected format: 'number [unit]', e.g., '1.5 GiB' or '1.5GiB'")
|
|
127
|
+
print(f"Got: {size}")
|
|
128
|
+
return 0
|
|
129
|
+
number, _, unit = match.groups()
|
|
130
|
+
number = float(number)
|
|
131
|
+
unit = unit.strip().lower().rstrip('b')
|
|
132
|
+
# Define the unit conversion dictionary
|
|
133
|
+
if unit.endswith('i'):
|
|
134
|
+
# this means we treat the unit as 1024 bytes if it ends with 'i'
|
|
135
|
+
use_1024_bytes = True
|
|
136
|
+
elif use_1024_bytes is None:
|
|
137
|
+
use_1024_bytes = False
|
|
138
|
+
unit = unit.rstrip('i')
|
|
139
|
+
if use_1024_bytes:
|
|
140
|
+
power = 2**10
|
|
141
|
+
else:
|
|
142
|
+
power = 10**3
|
|
143
|
+
unit_labels = {'': 0, 'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5}
|
|
144
|
+
if unit not in unit_labels:
|
|
145
|
+
print(f"Invalid unit '{unit}'. Expected one of {list(unit_labels.keys())}")
|
|
146
|
+
return 0
|
|
147
|
+
# Calculate the bytes
|
|
148
|
+
return int(number * (power ** unit_labels[unit]))
|
|
149
|
+
else:
|
|
150
|
+
try:
|
|
151
|
+
return int(size)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
return 0
|
|
154
|
+
elif to_str or isinstance(size, int) or isinstance(size, float):
|
|
155
|
+
if isinstance(size, str):
|
|
156
|
+
try:
|
|
157
|
+
size = size.lower().strip().rstrip('b')
|
|
158
|
+
size = float(size)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
return size
|
|
161
|
+
# size is in bytes
|
|
162
|
+
if use_1024_bytes or use_1024_bytes is None:
|
|
163
|
+
power = 2**10
|
|
164
|
+
n = 0
|
|
165
|
+
power_labels = {0 : '', 1: 'Ki', 2: 'Mi', 3: 'Gi', 4: 'Ti', 5: 'Pi'}
|
|
166
|
+
while size > power:
|
|
167
|
+
size /= power
|
|
168
|
+
n += 1
|
|
169
|
+
return f"{size:{str_format}} {power_labels[n]}"
|
|
170
|
+
else:
|
|
171
|
+
power = 10**3
|
|
172
|
+
n = 0
|
|
173
|
+
power_labels = {0 : '', 1: 'K', 2: 'M', 3: 'G', 4: 'T', 5: 'P'}
|
|
174
|
+
while size > power:
|
|
175
|
+
size /= power
|
|
176
|
+
n += 1
|
|
177
|
+
return f"{size:{str_format}} {power_labels[n]}"
|
|
178
|
+
else:
|
|
179
|
+
try:
|
|
180
|
+
return format_bytes(float(size), use_1024_bytes)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
import traceback
|
|
183
|
+
print(f"Error: {e}")
|
|
184
|
+
print(traceback.format_exc())
|
|
185
|
+
print(f"Invalid size: {size}")
|
|
186
|
+
return 0
|
|
187
|
+
|
|
188
|
+
def get_resource_usage(return_dict = False):
|
|
189
|
+
try:
|
|
190
|
+
if RESOURCE_LIB_AVAILABLE:
|
|
191
|
+
rawResource = resource.getrusage(resource.RUSAGE_SELF)
|
|
192
|
+
resourceDict = {}
|
|
193
|
+
resourceDict['user mode time'] = f'{rawResource.ru_utime} seconds'
|
|
194
|
+
resourceDict['system mode time'] = f'{rawResource.ru_stime} seconds'
|
|
195
|
+
resourceDict['max resident set size'] = f'{format_bytes(rawResource.ru_maxrss * 1024)}B'
|
|
196
|
+
resourceDict['shared memory size'] = f'{format_bytes(rawResource.ru_ixrss * 1024)}B'
|
|
197
|
+
resourceDict['unshared memory size'] = f'{format_bytes(rawResource.ru_idrss * 1024)}B'
|
|
198
|
+
resourceDict['unshared stack size'] = f'{format_bytes(rawResource.ru_isrss * 1024)}B'
|
|
199
|
+
resourceDict['cached page hits'] = f'{rawResource.ru_minflt}'
|
|
200
|
+
resourceDict['missed page hits'] = f'{rawResource.ru_majflt}'
|
|
201
|
+
resourceDict['swapped out page count'] = f'{rawResource.ru_nswap}'
|
|
202
|
+
resourceDict['block input operations'] = f'{rawResource.ru_inblock}'
|
|
203
|
+
resourceDict['block output operations'] = f'{rawResource.ru_oublock}'
|
|
204
|
+
resourceDict['IPC messages sent'] = f'{rawResource.ru_msgsnd}'
|
|
205
|
+
resourceDict['IPC messages received'] = f'{rawResource.ru_msgrcv}'
|
|
206
|
+
resourceDict['signals received'] = f'{rawResource.ru_nsignals}'
|
|
207
|
+
resourceDict['voluntary context sw'] = f'{rawResource.ru_nvcsw}'
|
|
208
|
+
resourceDict['involuntary context sw'] = f'{rawResource.ru_nivcsw}'
|
|
209
|
+
if return_dict:
|
|
210
|
+
return resourceDict
|
|
211
|
+
return '\n'.join(['\t'.join(line) for line in resourceDict.items()])
|
|
212
|
+
except Exception as e:
|
|
213
|
+
print(f"Error: {e}")
|
|
214
|
+
if return_dict:
|
|
215
|
+
return {}
|
|
216
|
+
return ''
|
|
217
|
+
|
|
92
218
|
def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
93
219
|
"""
|
|
94
220
|
Prints the given message or logs it using the provided teeLogger.
|
|
@@ -109,7 +235,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
|
109
235
|
except Exception as e:
|
|
110
236
|
print(message,flush=True)
|
|
111
237
|
|
|
112
|
-
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER):
|
|
238
|
+
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = []):
|
|
113
239
|
"""
|
|
114
240
|
Process a line of text and update the task dictionary.
|
|
115
241
|
|
|
@@ -120,6 +246,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
120
246
|
verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
121
247
|
teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
|
|
122
248
|
strict (bool, optional): Whether to strictly enforce the correct number of columns. Defaults to True.
|
|
249
|
+
defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
123
250
|
|
|
124
251
|
Returns:
|
|
125
252
|
tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
|
|
@@ -131,36 +258,40 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
131
258
|
if verbose:
|
|
132
259
|
__teePrintOrNot(f"Ignoring empty line: {line}",teeLogger=teeLogger)
|
|
133
260
|
return correctColumnNum , []
|
|
134
|
-
if line.startswith('#'):
|
|
261
|
+
if line.startswith('#') and not line.startswith(DEFAULTS_INDICATOR_KEY):
|
|
135
262
|
if verbose:
|
|
136
263
|
__teePrintOrNot(f"Ignoring comment line: {line}",teeLogger=teeLogger)
|
|
137
264
|
return correctColumnNum , []
|
|
138
265
|
# we only interested in the lines that have the correct number of columns
|
|
139
|
-
lineCache = [segment.
|
|
266
|
+
lineCache = [segment.rstrip() for segment in line.split(delimiter)]
|
|
140
267
|
if not lineCache:
|
|
141
268
|
return correctColumnNum , []
|
|
142
269
|
if correctColumnNum == -1:
|
|
270
|
+
if defaults and len(defaults) > 1:
|
|
271
|
+
correctColumnNum = len(defaults)
|
|
272
|
+
else:
|
|
273
|
+
correctColumnNum = len(lineCache)
|
|
143
274
|
if verbose:
|
|
144
275
|
__teePrintOrNot(f"detected correctColumnNum: {len(lineCache)}",teeLogger=teeLogger)
|
|
145
|
-
correctColumnNum = len(lineCache)
|
|
146
276
|
if not lineCache[0]:
|
|
147
277
|
if verbose:
|
|
148
278
|
__teePrintOrNot(f"Ignoring line with empty key: {line}",teeLogger=teeLogger)
|
|
149
279
|
return correctColumnNum , []
|
|
150
280
|
if len(lineCache) == 1 or not any(lineCache[1:]):
|
|
151
|
-
if correctColumnNum == 1:
|
|
281
|
+
if correctColumnNum == 1:
|
|
282
|
+
taskDic[lineCache[0]] = lineCache
|
|
283
|
+
elif lineCache[0] == DEFAULTS_INDICATOR_KEY:
|
|
284
|
+
if verbose:
|
|
285
|
+
__teePrintOrNot(f"Empty defaults line found: {line}",teeLogger=teeLogger)
|
|
286
|
+
defaults = []
|
|
152
287
|
else:
|
|
153
288
|
if verbose:
|
|
154
289
|
__teePrintOrNot(f"Key {lineCache[0]} found with empty value, deleting such key's representaion",teeLogger=teeLogger)
|
|
155
290
|
if lineCache[0] in taskDic:
|
|
156
291
|
del taskDic[lineCache[0]]
|
|
157
292
|
return correctColumnNum , []
|
|
158
|
-
elif len(lineCache)
|
|
159
|
-
|
|
160
|
-
if verbose:
|
|
161
|
-
__teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
|
|
162
|
-
else:
|
|
163
|
-
if strict:
|
|
293
|
+
elif len(lineCache) != correctColumnNum:
|
|
294
|
+
if strict and not any(defaults):
|
|
164
295
|
if verbose:
|
|
165
296
|
__teePrintOrNot(f"Ignoring line with {len(lineCache)} columns: {line}",teeLogger=teeLogger)
|
|
166
297
|
return correctColumnNum , []
|
|
@@ -170,12 +301,26 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
170
301
|
lineCache += ['']*(correctColumnNum-len(lineCache))
|
|
171
302
|
elif len(lineCache) > correctColumnNum:
|
|
172
303
|
lineCache = lineCache[:correctColumnNum]
|
|
173
|
-
taskDic[lineCache[0]] = lineCache
|
|
174
304
|
if verbose:
|
|
175
|
-
__teePrintOrNot(f"
|
|
305
|
+
__teePrintOrNot(f"Correcting {lineCache[0]}",teeLogger=teeLogger)
|
|
306
|
+
# now replace empty values with defaults
|
|
307
|
+
if defaults and len(defaults) > 1:
|
|
308
|
+
for i in range(1,len(lineCache)):
|
|
309
|
+
if not lineCache[i] and i < len(defaults) and defaults[i]:
|
|
310
|
+
lineCache[i] = defaults[i]
|
|
311
|
+
if verbose:
|
|
312
|
+
__teePrintOrNot(f"Replacing empty value at {i} with default: {defaults[i]}",teeLogger=teeLogger)
|
|
313
|
+
if lineCache[0] == DEFAULTS_INDICATOR_KEY:
|
|
314
|
+
if verbose:
|
|
315
|
+
__teePrintOrNot(f"Defaults line found: {line}",teeLogger=teeLogger)
|
|
316
|
+
defaults = lineCache
|
|
317
|
+
return correctColumnNum , []
|
|
318
|
+
taskDic[lineCache[0]] = lineCache
|
|
319
|
+
if verbose:
|
|
320
|
+
__teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
|
|
176
321
|
return correctColumnNum, lineCache
|
|
177
322
|
|
|
178
|
-
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter =
|
|
323
|
+
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults = []):
|
|
179
324
|
"""
|
|
180
325
|
Reads the last valid line from a file.
|
|
181
326
|
|
|
@@ -187,6 +332,8 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
187
332
|
teeLogger (optional): Logger to use for tee print. Defaults to None.
|
|
188
333
|
encoding (str, optional): The encoding of the file. Defaults to None.
|
|
189
334
|
strict (bool, optional): Whether to enforce strict processing. Defaults to False.
|
|
335
|
+
delimiter (str, optional): The delimiter used in the file. Defaults to None.
|
|
336
|
+
defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
190
337
|
|
|
191
338
|
Returns:
|
|
192
339
|
list: The last valid line data processed by processLine, or an empty list if none found.
|
|
@@ -220,13 +367,14 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
220
367
|
if lines[i].strip(): # Skip empty lines
|
|
221
368
|
# Process the line
|
|
222
369
|
correctColumnNum, lineCache = _processLine(
|
|
223
|
-
lines[i].decode(encoding=encoding),
|
|
224
|
-
taskDic,
|
|
225
|
-
correctColumnNum,
|
|
370
|
+
line=lines[i].decode(encoding=encoding),
|
|
371
|
+
taskDic=taskDic,
|
|
372
|
+
correctColumnNum=correctColumnNum,
|
|
226
373
|
verbose=verbose,
|
|
227
374
|
teeLogger=teeLogger,
|
|
228
375
|
strict=strict,
|
|
229
|
-
delimiter=delimiter
|
|
376
|
+
delimiter=delimiter,
|
|
377
|
+
defaults=defaults,
|
|
230
378
|
)
|
|
231
379
|
# If the line is valid, return it
|
|
232
380
|
if lineCache and any(lineCache):
|
|
@@ -327,7 +475,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
|
|
|
327
475
|
return False
|
|
328
476
|
return True
|
|
329
477
|
|
|
330
|
-
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t'):
|
|
478
|
+
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = []):
|
|
331
479
|
"""
|
|
332
480
|
Compatibility method, calls readTabularFile.
|
|
333
481
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
@@ -344,6 +492,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
344
492
|
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
345
493
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
346
494
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
|
|
495
|
+
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
347
496
|
|
|
348
497
|
Returns:
|
|
349
498
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -352,9 +501,9 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
352
501
|
- Exception: If the file is not found or there is a data format error.
|
|
353
502
|
|
|
354
503
|
"""
|
|
355
|
-
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter)
|
|
504
|
+
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
|
|
356
505
|
|
|
357
|
-
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter =
|
|
506
|
+
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = []):
|
|
358
507
|
"""
|
|
359
508
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
360
509
|
|
|
@@ -370,6 +519,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
370
519
|
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
371
520
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
372
521
|
- delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
|
|
522
|
+
- defaults (list, optional): The default values to use for missing columns. Defaults to [].
|
|
373
523
|
|
|
374
524
|
Returns:
|
|
375
525
|
- OrderedDict: The dictionary containing the data from the Tabular file.
|
|
@@ -394,12 +544,12 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
394
544
|
if verbose:
|
|
395
545
|
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
396
546
|
if lastLineOnly:
|
|
397
|
-
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter)
|
|
547
|
+
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter, defaults=defaults)
|
|
398
548
|
if lineCache:
|
|
399
549
|
taskDic[lineCache[0]] = lineCache
|
|
400
550
|
return lineCache
|
|
401
551
|
for line in file:
|
|
402
|
-
correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter)
|
|
552
|
+
correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
|
|
403
553
|
return taskDic
|
|
404
554
|
|
|
405
555
|
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
|
|
@@ -446,7 +596,7 @@ def appendTabularFile(fileName,lineToAppend,teeLogger = None,header = '',createI
|
|
|
446
596
|
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
447
597
|
return
|
|
448
598
|
if type(lineToAppend) == str:
|
|
449
|
-
lineToAppend = lineToAppend.
|
|
599
|
+
lineToAppend = lineToAppend.split(delimiter)
|
|
450
600
|
else:
|
|
451
601
|
for i in range(len(lineToAppend)):
|
|
452
602
|
if type(lineToAppend[i]) != str:
|
|
@@ -548,14 +698,19 @@ class TSVZed(OrderedDict):
|
|
|
548
698
|
except Exception as e:
|
|
549
699
|
print(message,flush=True)
|
|
550
700
|
|
|
551
|
-
def
|
|
701
|
+
def getResourseUsage(self,return_dict = False):
|
|
702
|
+
return get_resource_usage(return_dict = return_dict)
|
|
703
|
+
|
|
704
|
+
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = [],strict = False):
|
|
552
705
|
super().__init__()
|
|
553
706
|
self.version = version
|
|
707
|
+
self.strict = strict
|
|
554
708
|
self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
|
|
555
709
|
self.lastUpdateTime = self.externalFileUpdateTime
|
|
556
710
|
self._fileName = fileName
|
|
557
711
|
self.teeLogger = teeLogger
|
|
558
712
|
self.delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
713
|
+
self.defaults = defualts
|
|
559
714
|
self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
|
|
560
715
|
self.correctColumnNum = -1
|
|
561
716
|
self.createIfNotExist = createIfNotExist
|
|
@@ -584,6 +739,27 @@ class TSVZed(OrderedDict):
|
|
|
584
739
|
self.load()
|
|
585
740
|
atexit.register(self.stopAppendThread)
|
|
586
741
|
|
|
742
|
+
def setDefaults(self,defaults):
|
|
743
|
+
if not defaults:
|
|
744
|
+
defaults = []
|
|
745
|
+
return
|
|
746
|
+
if isinstance(defaults,str):
|
|
747
|
+
defaults = defaults.split(self.delimiter)
|
|
748
|
+
elif not isinstance(defaults,list):
|
|
749
|
+
try:
|
|
750
|
+
defaults = list(defaults)
|
|
751
|
+
except:
|
|
752
|
+
if self.verbose:
|
|
753
|
+
self.__teePrintOrNot('Invalid defaults, setting defaults to empty.','error')
|
|
754
|
+
defaults = []
|
|
755
|
+
return
|
|
756
|
+
if not any(defaults):
|
|
757
|
+
defaults = []
|
|
758
|
+
return
|
|
759
|
+
if defaults[0] != DEFAULTS_INDICATOR_KEY:
|
|
760
|
+
defaults = [DEFAULTS_INDICATOR_KEY]+defaults
|
|
761
|
+
self.defaults = defaults
|
|
762
|
+
|
|
587
763
|
def load(self):
|
|
588
764
|
self.reload()
|
|
589
765
|
if self.rewrite_on_load:
|
|
@@ -597,7 +773,7 @@ class TSVZed(OrderedDict):
|
|
|
597
773
|
if self.verbose:
|
|
598
774
|
self.__teePrintOrNot(f"Loading {self._fileName}")
|
|
599
775
|
super().clear()
|
|
600
|
-
readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict =
|
|
776
|
+
readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = self.strict, delimiter = self.delimiter, defaults=self.defaults)
|
|
601
777
|
if self.verbose:
|
|
602
778
|
self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
|
|
603
779
|
self.correctColumnNum = len(self.header.split(self.delimiter)) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
|
|
@@ -612,30 +788,55 @@ class TSVZed(OrderedDict):
|
|
|
612
788
|
return self
|
|
613
789
|
|
|
614
790
|
def __setitem__(self,key,value):
|
|
615
|
-
key = str(key).
|
|
791
|
+
key = str(key).rstrip()
|
|
616
792
|
if not key:
|
|
617
793
|
self.__teePrintOrNot('Key cannot be empty','error')
|
|
618
794
|
return
|
|
619
795
|
if type(value) == str:
|
|
620
|
-
value = value.
|
|
796
|
+
value = value.split(self.delimiter)
|
|
621
797
|
# sanitize the value
|
|
622
|
-
value = [(str(segment).
|
|
623
|
-
#
|
|
798
|
+
value = [(str(segment).rstrip() if type(segment) != str else segment.rstrip()) if segment else '' for segment in value]
|
|
799
|
+
# escape the delimiter and newline characters
|
|
800
|
+
value = [segment.replace(self.delimiter,'<sep>').replace('\n','\\n') for segment in value]
|
|
624
801
|
# the first field in value should be the key
|
|
625
802
|
# add it if it is not there
|
|
626
803
|
if not value or value[0] != key:
|
|
627
804
|
value = [key]+value
|
|
628
805
|
# verify the value has the correct number of columns
|
|
629
806
|
if self.correctColumnNum != 1 and len(value) == 1:
|
|
630
|
-
# this means we want to clear /
|
|
807
|
+
# this means we want to clear / delete the key
|
|
631
808
|
self.__delitem__(key)
|
|
632
809
|
elif self.correctColumnNum > 0:
|
|
633
|
-
|
|
810
|
+
if len(value) != self.correctColumnNum:
|
|
811
|
+
if self.strict:
|
|
812
|
+
self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}. Refuse adding key...",'error')
|
|
813
|
+
return
|
|
814
|
+
elif self.verbose:
|
|
815
|
+
self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}, correcting...",'warning')
|
|
816
|
+
if len(value) < self.correctColumnNum:
|
|
817
|
+
value += ['']*(self.correctColumnNum-len(value))
|
|
818
|
+
elif len(value) > self.correctColumnNum:
|
|
819
|
+
value = value[:self.correctColumnNum]
|
|
634
820
|
else:
|
|
635
821
|
self.correctColumnNum = len(value)
|
|
822
|
+
if self.defaults and len(self.defaults) > 1:
|
|
823
|
+
for i in range(1,len(value)):
|
|
824
|
+
if not value[i] and i < len(self.defaults) and self.defaults[i]:
|
|
825
|
+
value[i] = self.defaults[i]
|
|
826
|
+
if self.verbose:
|
|
827
|
+
self.__teePrintOrNot(f" Replacing empty value at {i} with default: {self.defaults[i]}")
|
|
828
|
+
if key == DEFAULTS_INDICATOR_KEY:
|
|
829
|
+
self.defaults = value
|
|
830
|
+
if self.verbose:
|
|
831
|
+
self.__teePrintOrNot(f"Defaults set to {value}")
|
|
832
|
+
if not self.memoryOnly:
|
|
833
|
+
self.appendQueue.append(self.delimiter.join(value))
|
|
834
|
+
self.lastUpdateTime = get_time_ns()
|
|
835
|
+
if self.verbose:
|
|
836
|
+
self.__teePrintOrNot(f"Appending Defaults {key} to the appendQueue")
|
|
837
|
+
return
|
|
636
838
|
if self.verbose:
|
|
637
839
|
self.__teePrintOrNot(f"Setting {key} to {value}")
|
|
638
|
-
|
|
639
840
|
if key in self:
|
|
640
841
|
if self[key] == value:
|
|
641
842
|
if self.verbose:
|
|
@@ -644,9 +845,13 @@ class TSVZed(OrderedDict):
|
|
|
644
845
|
self.dirty = True
|
|
645
846
|
# update the dictionary,
|
|
646
847
|
super().__setitem__(key,value)
|
|
647
|
-
if self.verbose:
|
|
648
|
-
self.__teePrintOrNot(f"Key {key} updated")
|
|
649
848
|
if self.memoryOnly:
|
|
849
|
+
if self.verbose:
|
|
850
|
+
self.__teePrintOrNot(f"Key {key} updated in memory only")
|
|
851
|
+
return
|
|
852
|
+
elif key.startswith('#'):
|
|
853
|
+
if self.verbose:
|
|
854
|
+
self.__teePrintOrNot(f"Key {key} updated in memory only as it starts with #")
|
|
650
855
|
return
|
|
651
856
|
if self.verbose:
|
|
652
857
|
self.__teePrintOrNot(f"Appending {key} to the appendQueue")
|
|
@@ -659,16 +864,29 @@ class TSVZed(OrderedDict):
|
|
|
659
864
|
|
|
660
865
|
|
|
661
866
|
def __delitem__(self,key):
|
|
662
|
-
key = str(key).
|
|
867
|
+
key = str(key).rstrip()
|
|
868
|
+
if key == DEFAULTS_INDICATOR_KEY:
|
|
869
|
+
self.defaults = []
|
|
870
|
+
if self.verbose:
|
|
871
|
+
self.__teePrintOrNot(f"Defaults cleared")
|
|
872
|
+
if not self.memoryOnly:
|
|
873
|
+
self.__appendEmptyLine(key)
|
|
874
|
+
if self.verbose:
|
|
875
|
+
self.__teePrintOrNot(f"Appending empty default line {key}")
|
|
876
|
+
return
|
|
663
877
|
# delete the key from the dictionary and update the file
|
|
664
878
|
if key not in self:
|
|
665
879
|
if self.verbose:
|
|
666
880
|
self.__teePrintOrNot(f"Key {key} not found")
|
|
667
881
|
return
|
|
668
882
|
super().__delitem__(key)
|
|
669
|
-
if self.memoryOnly:
|
|
883
|
+
if self.memoryOnly or key.startswith('#'):
|
|
884
|
+
if self.verbose:
|
|
885
|
+
self.__teePrintOrNot(f"Key {key} deleted in memory")
|
|
670
886
|
return
|
|
671
887
|
self.__appendEmptyLine(key)
|
|
888
|
+
if self.verbose:
|
|
889
|
+
self.__teePrintOrNot(f"Appending empty line {key}")
|
|
672
890
|
self.lastUpdateTime = get_time_ns()
|
|
673
891
|
|
|
674
892
|
def __appendEmptyLine(self,key):
|
|
@@ -868,30 +1086,35 @@ memoryOnly:{self.memoryOnly}
|
|
|
868
1086
|
return self
|
|
869
1087
|
|
|
870
1088
|
def mapToFile(self):
|
|
1089
|
+
mec = self.monitor_external_changes
|
|
1090
|
+
self.monitor_external_changes = False
|
|
871
1091
|
try:
|
|
872
1092
|
if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
|
|
873
1093
|
self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
|
|
874
1094
|
file = self.get_file_obj('r+b')
|
|
875
1095
|
overWrite = False
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
1096
|
+
if self.header:
|
|
1097
|
+
line = file.readline().decode(self.encoding)
|
|
1098
|
+
aftPos = file.tell()
|
|
1099
|
+
if not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = self.strict):
|
|
1100
|
+
file.seek(0)
|
|
1101
|
+
file.write(f'{self.header}\n'.encode(encoding=self.encoding))
|
|
1102
|
+
# if the header is not the same length as the line, we need to overwrite the file
|
|
1103
|
+
if aftPos != file.tell():
|
|
1104
|
+
overWrite = True
|
|
1105
|
+
if self.verbose:
|
|
1106
|
+
self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
|
|
886
1107
|
for value in self.values():
|
|
887
|
-
|
|
1108
|
+
if value[0].startswith('#'):
|
|
1109
|
+
continue
|
|
1110
|
+
strToWrite = self.delimiter.join(value)
|
|
888
1111
|
if overWrite:
|
|
889
1112
|
if self.verbose:
|
|
890
1113
|
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
891
|
-
file.write(strToWrite.encode(encoding=self.encoding))
|
|
1114
|
+
file.write(strToWrite.encode(encoding=self.encoding)+b'\n')
|
|
892
1115
|
continue
|
|
893
1116
|
pos = file.tell()
|
|
894
|
-
line = file.readline()
|
|
1117
|
+
line = file.readline()
|
|
895
1118
|
aftPos = file.tell()
|
|
896
1119
|
if not line or pos == aftPos:
|
|
897
1120
|
if self.verbose:
|
|
@@ -899,13 +1122,14 @@ memoryOnly:{self.memoryOnly}
|
|
|
899
1122
|
file.write(strToWrite.encode(encoding=self.encoding))
|
|
900
1123
|
overWrite = True
|
|
901
1124
|
continue
|
|
1125
|
+
strToWrite = strToWrite.encode(encoding=self.encoding).ljust(len(line)-1)+b'\n'
|
|
902
1126
|
if line != strToWrite:
|
|
903
1127
|
if self.verbose:
|
|
904
|
-
self.__teePrintOrNot(f"
|
|
1128
|
+
self.__teePrintOrNot(f"Modifing {value} to {self._fileName}")
|
|
905
1129
|
file.seek(pos)
|
|
906
1130
|
# fill the string with space to write to the correct length
|
|
907
1131
|
#file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
|
|
908
|
-
file.write(strToWrite
|
|
1132
|
+
file.write(strToWrite)
|
|
909
1133
|
if aftPos != file.tell():
|
|
910
1134
|
overWrite = True
|
|
911
1135
|
file.truncate()
|
|
@@ -921,6 +1145,8 @@ memoryOnly:{self.memoryOnly}
|
|
|
921
1145
|
import traceback
|
|
922
1146
|
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
923
1147
|
self.deSynced = True
|
|
1148
|
+
self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
|
|
1149
|
+
self.monitor_external_changes = mec
|
|
924
1150
|
return self
|
|
925
1151
|
|
|
926
1152
|
def checkExternalChanges(self):
|
|
@@ -1062,7 +1288,10 @@ def __main__():
|
|
|
1062
1288
|
parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
|
|
1063
1289
|
parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
|
|
1064
1290
|
parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
|
|
1065
|
-
parser.add_argument('
|
|
1291
|
+
parser.add_argument('--defaults', type=str, help='Default values to fill in the missing columns. seperate using --delimiter. Ex. if -d = comma, --defaults="key,value1,value2..." Note: Please specify the key. But it will not be used as a key need to be unique in data.')
|
|
1292
|
+
strictMode = parser.add_mutually_exclusive_group()
|
|
1293
|
+
strictMode.add_argument('-s', '--strict', dest = 'strict',action='store_true', help='Strict mode. Do not parse values that seems malformed, check for column numbers / headers')
|
|
1294
|
+
strictMode.add_argument('-f', '--force', dest = 'strict',action='store_false', help='Force the operation. Ignore checks for column numbers / headers')
|
|
1066
1295
|
parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
|
|
1067
1296
|
parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
|
|
1068
1297
|
args = parser.parse_args()
|
|
@@ -1074,6 +1303,13 @@ def __main__():
|
|
|
1074
1303
|
except Exception as e:
|
|
1075
1304
|
print(f"Failed to decode header: {args.header}")
|
|
1076
1305
|
header = ''
|
|
1306
|
+
defaults = []
|
|
1307
|
+
if args.defaults:
|
|
1308
|
+
try:
|
|
1309
|
+
defaults = args.defaults.encode().decode('unicode_escape').split(args.delimiter)
|
|
1310
|
+
except Exception as e:
|
|
1311
|
+
print(f"Failed to decode defaults: {args.defaults}")
|
|
1312
|
+
defaults = []
|
|
1077
1313
|
|
|
1078
1314
|
if args.operation == 'read':
|
|
1079
1315
|
# check if the file exist
|
|
@@ -1081,14 +1317,14 @@ def __main__():
|
|
|
1081
1317
|
print(f"File not found: {args.filename}")
|
|
1082
1318
|
return
|
|
1083
1319
|
# read the file
|
|
1084
|
-
data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict=
|
|
1320
|
+
data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= args.strict, delimiter=args.delimiter, defaults=defaults)
|
|
1085
1321
|
print(pretty_format_table(data.values(),delimiter=args.delimiter))
|
|
1086
1322
|
elif args.operation == 'append':
|
|
1087
|
-
appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict=
|
|
1323
|
+
appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
|
|
1088
1324
|
elif args.operation == 'delete':
|
|
1089
|
-
appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict=
|
|
1325
|
+
appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
|
|
1090
1326
|
elif args.operation == 'clear':
|
|
1091
|
-
clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=
|
|
1327
|
+
clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
|
|
1092
1328
|
else:
|
|
1093
1329
|
print("Invalid operation")
|
|
1094
1330
|
return
|
{tsvz-3.2 → tsvz-3.11}/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tsvz-3.2 → tsvz-3.11}/setup.cfg
RENAMED
|
File without changes
|
{tsvz-3.2 → tsvz-3.11}/setup.py
RENAMED
|
File without changes
|