TSVZ 2.57__py3-none-any.whl → 2.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TSVZ-2.65.dist-info/METADATA +48 -0
- TSVZ-2.65.dist-info/RECORD +7 -0
- {TSVZ-2.57.dist-info → TSVZ-2.65.dist-info}/WHEEL +1 -1
- TSVZ-2.65.dist-info/entry_points.txt +3 -0
- TSVZ.py +347 -91
- TSVZ-2.57.dist-info/METADATA +0 -17
- TSVZ-2.57.dist-info/RECORD +0 -6
- {TSVZ-2.57.dist-info → TSVZ-2.65.dist-info}/LICENSE +0 -0
- {TSVZ-2.57.dist-info → TSVZ-2.65.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: TSVZ
|
|
3
|
+
Version: 2.65
|
|
4
|
+
Summary: An simple in memory wrapper around a TSV file to function as a database
|
|
5
|
+
Home-page: https://github.com/yufei-pan/TSVZ
|
|
6
|
+
Author: Yufei Pan
|
|
7
|
+
Author-email: pan@zopyr.us
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
This lib provides some helper funtions to interact with tsv ( tab seperated values ) files.
|
|
16
|
+
|
|
17
|
+
TSVZ can also funtion like an in memory DB that is able to perform non-blocking read / write to TSV files.
|
|
18
|
+
|
|
19
|
+
Import as a lib or use console tool:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
tsvz -h
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
TSVZ -h
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
usage: TSVZ [-h] [-c HEADER] [-f] [-v] [-V] filename [{read,append,delete,clear}] [line ...]
|
|
31
|
+
|
|
32
|
+
TSVZed: A TSV file manager
|
|
33
|
+
|
|
34
|
+
positional arguments:
|
|
35
|
+
filename The TSV file to read
|
|
36
|
+
{read,append,delete,clear}
|
|
37
|
+
The operation to perform. Default: read
|
|
38
|
+
line The line to append to the TSV file. it follows as : \{key\} \{value1\} \{value2\} ... if a key without value be
|
|
39
|
+
inserted, the value will get deleted.
|
|
40
|
+
|
|
41
|
+
options:
|
|
42
|
+
-h, --help show this help message and exit
|
|
43
|
+
-c HEADER, --header HEADER
|
|
44
|
+
Perform checks with this header of the TSV file. seperate using \t
|
|
45
|
+
-f, --force Force the operation. Ignore checks for column numbers / headers
|
|
46
|
+
-v, --verbose Print verbose output
|
|
47
|
+
-V, --version show program's version number and exit
|
|
48
|
+
```
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
TSVZ.py,sha256=tDsAV_zNosNEn5-FifNSgacdr9T-6zsQuZPZFkVakIc,42423
|
|
2
|
+
TSVZ-2.65.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
+
TSVZ-2.65.dist-info/METADATA,sha256=NxGY5k380-pEPwsTqQdUo3F52jiH8QexSbLcGN4Me94,1689
|
|
4
|
+
TSVZ-2.65.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
5
|
+
TSVZ-2.65.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
|
|
6
|
+
TSVZ-2.65.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
|
|
7
|
+
TSVZ-2.65.dist-info/RECORD,,
|
TSVZ.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
import os
|
|
2
|
+
import os , sys
|
|
3
3
|
from collections import OrderedDict , deque
|
|
4
4
|
import time
|
|
5
5
|
import atexit
|
|
@@ -10,7 +10,53 @@ if os.name == 'nt':
|
|
|
10
10
|
elif os.name == 'posix':
|
|
11
11
|
import fcntl
|
|
12
12
|
|
|
13
|
-
version = '2.
|
|
13
|
+
version = '2.65'
|
|
14
|
+
author = 'pan@zopyr.us'
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def pretty_format_table(data):
|
|
18
|
+
if not data:
|
|
19
|
+
return
|
|
20
|
+
if type(data) == str:
|
|
21
|
+
data = data.strip('\n').split('\n')
|
|
22
|
+
elif type(data) != list:
|
|
23
|
+
data = list(data)
|
|
24
|
+
num_cols = len(data[0])
|
|
25
|
+
col_widths = [0] * num_cols
|
|
26
|
+
# Calculate the maximum width of each column
|
|
27
|
+
for c in range(num_cols):
|
|
28
|
+
col_items = [str(row[c]) for row in data]
|
|
29
|
+
col_widths[c] = max(len(item) for item in col_items)
|
|
30
|
+
# Build the row format string
|
|
31
|
+
row_format = ' | '.join('{{:<{}}}'.format(width) for width in col_widths)
|
|
32
|
+
# Print the header
|
|
33
|
+
header = data[0]
|
|
34
|
+
outTable = []
|
|
35
|
+
outTable.append(row_format.format(*header))
|
|
36
|
+
outTable.append('-+-'.join('-' * width for width in col_widths))
|
|
37
|
+
for row in data[1:]:
|
|
38
|
+
outTable.append(row_format.format(*row))
|
|
39
|
+
return '\n'.join(outTable) + '\n'
|
|
40
|
+
|
|
41
|
+
def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
42
|
+
"""
|
|
43
|
+
Prints the given message or logs it using the provided teeLogger.
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
message (str): The message to be printed or logged.
|
|
47
|
+
level (str, optional): The log level. Defaults to 'info'.
|
|
48
|
+
teeLogger (object, optional): The logger object used for logging. Defaults to None.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
None
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
if teeLogger:
|
|
55
|
+
teeLogger.teelog(message,level)
|
|
56
|
+
else:
|
|
57
|
+
print(message,flush=True)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
print(message,flush=True)
|
|
14
60
|
|
|
15
61
|
def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True):
|
|
16
62
|
"""
|
|
@@ -138,6 +184,87 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
138
184
|
# Return empty list if no valid line found
|
|
139
185
|
return last_valid_line
|
|
140
186
|
|
|
187
|
+
def formatHeader(header,verbose = False,teeLogger = None):
|
|
188
|
+
"""
|
|
189
|
+
Format the header string.
|
|
190
|
+
|
|
191
|
+
Parameters:
|
|
192
|
+
- header (str or list): The header string or list to format.
|
|
193
|
+
- verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
194
|
+
- teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
str: The formatted header string.
|
|
198
|
+
"""
|
|
199
|
+
if type(header) != str:
|
|
200
|
+
try:
|
|
201
|
+
header = '\t'.join(header)
|
|
202
|
+
except:
|
|
203
|
+
if verbose:
|
|
204
|
+
__teePrintOrNot('Invalid header, setting header to empty.','error',teeLogger=teeLogger)
|
|
205
|
+
header = ''
|
|
206
|
+
header = header.strip()
|
|
207
|
+
# if header:
|
|
208
|
+
# if not header.endswith('\n'):
|
|
209
|
+
# header += '\n'
|
|
210
|
+
# else:
|
|
211
|
+
# header = ''
|
|
212
|
+
return header
|
|
213
|
+
|
|
214
|
+
def lineContainHeader(header,line,verbose = False,teeLogger = None,strict = False):
|
|
215
|
+
"""
|
|
216
|
+
Verify if a line contains the header.
|
|
217
|
+
|
|
218
|
+
Parameters:
|
|
219
|
+
- header (str): The header string to verify.
|
|
220
|
+
- line (str): The line to verify against the header.
|
|
221
|
+
- verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
222
|
+
- teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
|
|
223
|
+
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
bool: True if the header matches the line, False otherwise.
|
|
227
|
+
"""
|
|
228
|
+
if verbose:
|
|
229
|
+
__teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
|
|
230
|
+
__teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
|
|
231
|
+
if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
|
|
232
|
+
__teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
|
|
233
|
+
if strict:
|
|
234
|
+
raise Exception("Data format error! Header mismatch")
|
|
235
|
+
return False
|
|
236
|
+
return True
|
|
237
|
+
|
|
238
|
+
def verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = None,header = '',encoding = 'utf8',strict = True):
|
|
239
|
+
"""
|
|
240
|
+
Verify the existence of a TSV file.
|
|
241
|
+
|
|
242
|
+
Parameters:
|
|
243
|
+
- fileName (str): The path of the TSV file.
|
|
244
|
+
- createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to True.
|
|
245
|
+
- teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
|
|
246
|
+
- header (str, optional): The header line to verify against. Defaults to ''.
|
|
247
|
+
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
248
|
+
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
bool: True if the file exists, False otherwise.
|
|
252
|
+
"""
|
|
253
|
+
if not fileName.endswith('.tsv'):
|
|
254
|
+
__teePrintOrNot(f'Warning: Filename {fileName} does not end with .tsv','warning',teeLogger=teeLogger)
|
|
255
|
+
if not os.path.isfile(fileName):
|
|
256
|
+
if createIfNotExist:
|
|
257
|
+
with open(fileName, mode ='w',encoding=encoding)as file:
|
|
258
|
+
file.write(header+'\n')
|
|
259
|
+
__teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
|
|
260
|
+
return True
|
|
261
|
+
elif strict:
|
|
262
|
+
__teePrintOrNot('File not found','error',teeLogger=teeLogger)
|
|
263
|
+
raise Exception("File not found")
|
|
264
|
+
else:
|
|
265
|
+
return False
|
|
266
|
+
return True
|
|
267
|
+
|
|
141
268
|
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True):
|
|
142
269
|
"""
|
|
143
270
|
Read a TSV (Tab-Separated Values) file and return the data as a dictionary.
|
|
@@ -150,7 +277,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
150
277
|
- lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
|
|
151
278
|
- verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
|
|
152
279
|
- verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
153
|
-
- taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to
|
|
280
|
+
- taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
|
|
154
281
|
- encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
|
|
155
282
|
- strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
|
|
156
283
|
|
|
@@ -162,36 +289,19 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
162
289
|
|
|
163
290
|
"""
|
|
164
291
|
if taskDic is None:
|
|
165
|
-
taskDic =
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
header += '\n'
|
|
170
|
-
if not os.path.isfile(fileName):
|
|
171
|
-
if createIfNotExist:
|
|
172
|
-
with open(fileName, mode ='w',encoding=encoding)as file:
|
|
173
|
-
file.write(header)
|
|
174
|
-
__teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
|
|
175
|
-
verifyHeader = True
|
|
176
|
-
else:
|
|
177
|
-
__teePrintOrNot('File not found','error',teeLogger=teeLogger)
|
|
178
|
-
raise Exception("File not found")
|
|
292
|
+
taskDic = {}
|
|
293
|
+
header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
|
|
294
|
+
if not verifyTSVExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict):
|
|
295
|
+
return taskDic
|
|
179
296
|
with open(fileName, mode ='rb')as file:
|
|
297
|
+
correctColumnNum = -1
|
|
180
298
|
if header.strip():
|
|
181
299
|
if verifyHeader:
|
|
182
300
|
line = file.readline().decode().strip()
|
|
183
|
-
if verbose:
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
|
|
188
|
-
__teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
|
|
189
|
-
raise Exception("Data format error! Header mismatch")
|
|
190
|
-
correctColumnNum = len(header.strip().split('\t'))
|
|
191
|
-
if verbose:
|
|
192
|
-
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
193
|
-
else:
|
|
194
|
-
correctColumnNum = -1
|
|
301
|
+
if lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
302
|
+
correctColumnNum = len(header.strip().split('\t'))
|
|
303
|
+
if verbose:
|
|
304
|
+
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
195
305
|
if lastLineOnly:
|
|
196
306
|
lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict)
|
|
197
307
|
if lineCache:
|
|
@@ -201,27 +311,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
201
311
|
correctColumnNum, lineCache = processLine(line,taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict)
|
|
202
312
|
return taskDic
|
|
203
313
|
|
|
204
|
-
def
|
|
205
|
-
"""
|
|
206
|
-
Prints the given message or logs it using the provided teeLogger.
|
|
207
|
-
|
|
208
|
-
Parameters:
|
|
209
|
-
message (str): The message to be printed or logged.
|
|
210
|
-
level (str, optional): The log level. Defaults to 'info'.
|
|
211
|
-
teeLogger (object, optional): The logger object used for logging. Defaults to None.
|
|
212
|
-
|
|
213
|
-
Returns:
|
|
214
|
-
None
|
|
215
|
-
"""
|
|
216
|
-
try:
|
|
217
|
-
if teeLogger:
|
|
218
|
-
teeLogger.teelog(message,level)
|
|
219
|
-
else:
|
|
220
|
-
print(message)
|
|
221
|
-
except Exception as e:
|
|
222
|
-
print(message)
|
|
223
|
-
|
|
224
|
-
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8'):
|
|
314
|
+
def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True):
|
|
225
315
|
"""
|
|
226
316
|
Append a line of data to a TSV file.
|
|
227
317
|
Parameters:
|
|
@@ -233,41 +323,26 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
|
|
|
233
323
|
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
234
324
|
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
235
325
|
- encoding (str, optional): The encoding of the file.
|
|
326
|
+
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
236
327
|
Raises:
|
|
237
328
|
- Exception: If the file does not exist and createIfNotExist is False.
|
|
238
329
|
- Exception: If the existing header does not match the provided header.
|
|
239
330
|
"""
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
if createIfNotExist:
|
|
244
|
-
with open(fileName, mode ='w',encoding=encoding)as file:
|
|
245
|
-
file.write(header)
|
|
246
|
-
__teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
|
|
247
|
-
verifyHeader = True
|
|
248
|
-
else:
|
|
249
|
-
__teePrintOrNot('File not found','error',teeLogger=teeLogger)
|
|
250
|
-
raise Exception("File not found")
|
|
251
|
-
|
|
331
|
+
header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
|
|
332
|
+
if not verifyTSVExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict):
|
|
333
|
+
return
|
|
252
334
|
if type(lineToAppend) == str:
|
|
253
335
|
lineToAppend = lineToAppend.strip().split('\t')
|
|
254
336
|
|
|
255
337
|
with open(fileName, mode ='r+b')as file:
|
|
338
|
+
correctColumnNum = len(lineToAppend)
|
|
256
339
|
if header.strip():
|
|
257
340
|
if verifyHeader:
|
|
258
341
|
line = file.readline().decode().strip()
|
|
259
|
-
if verbose:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
|
|
264
|
-
__teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
|
|
265
|
-
raise Exception("Data format error! Header mismatch")
|
|
266
|
-
correctColumnNum = len(header.strip().split('\t'))
|
|
267
|
-
if verbose:
|
|
268
|
-
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
269
|
-
else:
|
|
270
|
-
correctColumnNum = len(lineToAppend)
|
|
342
|
+
if lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
343
|
+
correctColumnNum = len(header.strip().split('\t'))
|
|
344
|
+
if verbose:
|
|
345
|
+
__teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
|
|
271
346
|
# truncate / fill the lineToAppend to the correct number of columns
|
|
272
347
|
if len(lineToAppend) < correctColumnNum:
|
|
273
348
|
lineToAppend += ['']*(correctColumnNum-len(lineToAppend))
|
|
@@ -281,6 +356,40 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
|
|
|
281
356
|
if verbose:
|
|
282
357
|
__teePrintOrNot(f"Appended {lineToAppend} to {fileName}",teeLogger=teeLogger)
|
|
283
358
|
|
|
359
|
+
def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False):
|
|
360
|
+
"""
|
|
361
|
+
Clear the contents of a TSV file. Will create if not exist.
|
|
362
|
+
Parameters:
|
|
363
|
+
- fileName (str): The path of the TSV file.
|
|
364
|
+
- teeLogger (optional): A logger object for logging messages.
|
|
365
|
+
- header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
|
|
366
|
+
- verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
|
|
367
|
+
- verbose (bool, optional): If True, additional information will be printed during the execution.
|
|
368
|
+
- encoding (str, optional): The encoding of the file.
|
|
369
|
+
- strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
|
|
370
|
+
"""
|
|
371
|
+
header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
|
|
372
|
+
if not verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False):
|
|
373
|
+
raise Exception("Something catastrophic happened! File still not found after creation")
|
|
374
|
+
else:
|
|
375
|
+
with open(fileName, mode ='r+',encoding=encoding)as file:
|
|
376
|
+
if header.strip() and verifyHeader:
|
|
377
|
+
line = file.readline().strip()
|
|
378
|
+
if not lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
|
|
379
|
+
__teePrintOrNot(f'Warning: Header mismatch in {fileName}. Keeping original header in file...','warning',teeLogger)
|
|
380
|
+
file.truncate()
|
|
381
|
+
else:
|
|
382
|
+
file.write(header+'\n')
|
|
383
|
+
if verbose:
|
|
384
|
+
__teePrintOrNot(f"Cleared {fileName}",teeLogger=teeLogger)
|
|
385
|
+
|
|
386
|
+
def getFileUpdateTimeNs(fileName):
|
|
387
|
+
try:
|
|
388
|
+
return os.stat(fileName).st_mtime_ns
|
|
389
|
+
except:
|
|
390
|
+
__teePrintOrNot(f"Failed to get file update time for {fileName}",'error')
|
|
391
|
+
return time.time_ns()
|
|
392
|
+
|
|
284
393
|
# create a tsv class that functions like a ordered dictionary but will update the file when modified
|
|
285
394
|
class TSVZed(OrderedDict):
|
|
286
395
|
def __teePrintOrNot(self,message,level = 'info'):
|
|
@@ -288,16 +397,18 @@ class TSVZed(OrderedDict):
|
|
|
288
397
|
if self.teeLogger:
|
|
289
398
|
self.teeLogger.teelog(message,level)
|
|
290
399
|
else:
|
|
291
|
-
print(message)
|
|
400
|
+
print(message,flush=True)
|
|
292
401
|
except Exception as e:
|
|
293
|
-
print(message)
|
|
402
|
+
print(message,flush=True)
|
|
294
403
|
|
|
295
404
|
def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = None):
|
|
296
405
|
super().__init__()
|
|
297
406
|
self.version = version
|
|
407
|
+
self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
|
|
408
|
+
self.lastUpdateTime = self.externalFileUpdateTime
|
|
298
409
|
self._fileName = fileName
|
|
299
410
|
self.teeLogger = teeLogger
|
|
300
|
-
self.header =
|
|
411
|
+
self.header = formatHeader(header,verbose = verbose,teeLogger = self.teeLogger)
|
|
301
412
|
self.correctColumnNum = -1
|
|
302
413
|
self.createIfNotExist = createIfNotExist
|
|
303
414
|
self.verifyHeader = verifyHeader
|
|
@@ -305,6 +416,8 @@ class TSVZed(OrderedDict):
|
|
|
305
416
|
self.rewrite_on_exit = rewrite_on_exit
|
|
306
417
|
self.rewrite_interval = rewrite_interval
|
|
307
418
|
self.monitor_external_changes = monitor_external_changes
|
|
419
|
+
if not monitor_external_changes:
|
|
420
|
+
self.__teePrintOrNot(f"Warning: External changes monitoring disabled for {self._fileName}. Will overwrite external changes.",'warning')
|
|
308
421
|
self.verbose = verbose
|
|
309
422
|
if append_check_delay < 0:
|
|
310
423
|
append_check_delay = 0.00001
|
|
@@ -345,6 +458,8 @@ class TSVZed(OrderedDict):
|
|
|
345
458
|
#super().update(loadedData)
|
|
346
459
|
if self.verbose:
|
|
347
460
|
self.__teePrintOrNot(f"TSVZed({self._fileName}) loaded")
|
|
461
|
+
self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
|
|
462
|
+
self.lastUpdateTime = self.externalFileUpdateTime
|
|
348
463
|
self.memoryOnly = mo
|
|
349
464
|
return self
|
|
350
465
|
|
|
@@ -388,6 +503,7 @@ class TSVZed(OrderedDict):
|
|
|
388
503
|
if self.verbose:
|
|
389
504
|
self.__teePrintOrNot(f"Appending {key} to the appendQueue")
|
|
390
505
|
self.appendQueue.append('\t'.join(value))
|
|
506
|
+
self.lastUpdateTime = time.time_ns()
|
|
391
507
|
# if not self.appendThread.is_alive():
|
|
392
508
|
# self.commitAppendToFile()
|
|
393
509
|
# else:
|
|
@@ -405,6 +521,7 @@ class TSVZed(OrderedDict):
|
|
|
405
521
|
if self.memoryOnly:
|
|
406
522
|
return
|
|
407
523
|
self.__appendEmptyLine(key)
|
|
524
|
+
self.lastUpdateTime = time.time_ns()
|
|
408
525
|
|
|
409
526
|
def __appendEmptyLine(self,key):
|
|
410
527
|
self.dirty = True
|
|
@@ -428,6 +545,7 @@ class TSVZed(OrderedDict):
|
|
|
428
545
|
if self.memoryOnly:
|
|
429
546
|
return self
|
|
430
547
|
self.clear_file()
|
|
548
|
+
self.lastUpdateTime = self.externalFileUpdateTime
|
|
431
549
|
return self
|
|
432
550
|
|
|
433
551
|
def clear_file(self):
|
|
@@ -448,6 +566,7 @@ class TSVZed(OrderedDict):
|
|
|
448
566
|
self.dirty = False
|
|
449
567
|
self.deSynced = False
|
|
450
568
|
except Exception as e:
|
|
569
|
+
self.release_file_obj(file)
|
|
451
570
|
self.__teePrintOrNot(f"Failed to write at clear_file() to {self._fileName}: {e}",'error')
|
|
452
571
|
import traceback
|
|
453
572
|
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
@@ -495,6 +614,7 @@ memoryOnly:{self.memoryOnly}
|
|
|
495
614
|
key, value = super().popitem(last)
|
|
496
615
|
if not self.memoryOnly:
|
|
497
616
|
self.__appendEmptyLine(key)
|
|
617
|
+
self.lastUpdateTime = time.time_ns()
|
|
498
618
|
return key, value
|
|
499
619
|
|
|
500
620
|
__marker = object()
|
|
@@ -512,6 +632,7 @@ memoryOnly:{self.memoryOnly}
|
|
|
512
632
|
value = super().pop(key)
|
|
513
633
|
if not self.memoryOnly:
|
|
514
634
|
self.__appendEmptyLine(key)
|
|
635
|
+
self.lastUpdateTime = time.time_ns()
|
|
515
636
|
return value
|
|
516
637
|
|
|
517
638
|
def move_to_end(self, key, last=True):
|
|
@@ -526,6 +647,7 @@ memoryOnly:{self.memoryOnly}
|
|
|
526
647
|
self.__teePrintOrNot(f"rewrite_on_exit set to True")
|
|
527
648
|
if self.verbose:
|
|
528
649
|
self.__teePrintOrNot(f"Warning: Trying to move Key {key} moved to {'end' if last else 'beginning'} Need to resync for changes to apply to disk")
|
|
650
|
+
self.lastUpdateTime = time.time_ns()
|
|
529
651
|
return self
|
|
530
652
|
|
|
531
653
|
@classmethod
|
|
@@ -539,23 +661,29 @@ memoryOnly:{self.memoryOnly}
|
|
|
539
661
|
|
|
540
662
|
|
|
541
663
|
def rewrite(self,force = False,reloadInternalFromFile = None):
|
|
542
|
-
if not self.dirty and not force:
|
|
543
|
-
return False
|
|
544
664
|
if not self.deSynced and not force:
|
|
665
|
+
if not self.dirty:
|
|
666
|
+
return False
|
|
545
667
|
if self.rewrite_interval == 0 or time.time() - os.path.getmtime(self._fileName) < self.rewrite_interval:
|
|
546
668
|
return False
|
|
547
669
|
try:
|
|
548
|
-
|
|
549
|
-
self.__teePrintOrNot(f"Rewriting {self._fileName}")
|
|
670
|
+
|
|
550
671
|
if reloadInternalFromFile is None:
|
|
551
672
|
reloadInternalFromFile = self.monitor_external_changes
|
|
552
|
-
if reloadInternalFromFile:
|
|
673
|
+
if reloadInternalFromFile and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
|
|
553
674
|
# this will be needed if more than 1 process is accessing the file
|
|
554
675
|
self.commitAppendToFile()
|
|
555
676
|
self.reload()
|
|
556
|
-
self.
|
|
557
|
-
|
|
558
|
-
|
|
677
|
+
if self.memoryOnly:
|
|
678
|
+
if self.verbose:
|
|
679
|
+
self.__teePrintOrNot(f"Memory only mode. Map to file skipped.")
|
|
680
|
+
return False
|
|
681
|
+
if self.dirty:
|
|
682
|
+
if self.verbose:
|
|
683
|
+
self.__teePrintOrNot(f"Rewriting {self._fileName}")
|
|
684
|
+
self.mapToFile()
|
|
685
|
+
if self.verbose:
|
|
686
|
+
self.__teePrintOrNot(f"{len(self)} records rewrote to {self._fileName}")
|
|
559
687
|
if not self.appendThread.is_alive():
|
|
560
688
|
self.commitAppendToFile()
|
|
561
689
|
# else:
|
|
@@ -568,8 +696,10 @@ memoryOnly:{self.memoryOnly}
|
|
|
568
696
|
self.deSynced = True
|
|
569
697
|
return False
|
|
570
698
|
|
|
571
|
-
def
|
|
699
|
+
def oldMapToFile(self):
|
|
572
700
|
try:
|
|
701
|
+
if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
|
|
702
|
+
self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
|
|
573
703
|
file = self.get_file_obj('w')
|
|
574
704
|
if self.header:
|
|
575
705
|
file.write(self.header+'\n')
|
|
@@ -582,14 +712,83 @@ memoryOnly:{self.memoryOnly}
|
|
|
582
712
|
self.dirty = False
|
|
583
713
|
self.deSynced = False
|
|
584
714
|
except Exception as e:
|
|
585
|
-
self.
|
|
715
|
+
self.release_file_obj(file)
|
|
716
|
+
self.__teePrintOrNot(f"Failed to write at oldMapToFile() to {self._fileName}: {e}",'error')
|
|
717
|
+
import traceback
|
|
718
|
+
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
719
|
+
self.deSynced = True
|
|
720
|
+
return self
|
|
721
|
+
|
|
722
|
+
def mapToFile(self):
|
|
723
|
+
try:
|
|
724
|
+
if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
|
|
725
|
+
self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
|
|
726
|
+
file = self.get_file_obj('r+')
|
|
727
|
+
overWrite = False
|
|
728
|
+
line = file.readline()
|
|
729
|
+
aftPos = file.tell()
|
|
730
|
+
if self.header and not lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = False):
|
|
731
|
+
file.seek(0)
|
|
732
|
+
file.write(self.header+'\n')
|
|
733
|
+
# if the header is not the same length as the line, we need to overwrite the file
|
|
734
|
+
if aftPos != file.tell():
|
|
735
|
+
overWrite = True
|
|
736
|
+
if self.verbose:
|
|
737
|
+
self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
|
|
738
|
+
for value in self.values():
|
|
739
|
+
strToWrite = '\t'.join(value)+'\n'
|
|
740
|
+
if overWrite:
|
|
741
|
+
if self.verbose:
|
|
742
|
+
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
743
|
+
file.write(strToWrite)
|
|
744
|
+
continue
|
|
745
|
+
pos = file.tell()
|
|
746
|
+
line = file.readline()
|
|
747
|
+
aftPos = file.tell()
|
|
748
|
+
if not line or pos == aftPos:
|
|
749
|
+
if self.verbose:
|
|
750
|
+
self.__teePrintOrNot(f"End of file reached. Appending {value} to {self._fileName}")
|
|
751
|
+
file.write(strToWrite)
|
|
752
|
+
overWrite = True
|
|
753
|
+
continue
|
|
754
|
+
if line != strToWrite:
|
|
755
|
+
if self.verbose:
|
|
756
|
+
self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
|
|
757
|
+
file.seek(pos)
|
|
758
|
+
# fill the string with space to write to the correct length
|
|
759
|
+
file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
|
|
760
|
+
if aftPos != file.tell():
|
|
761
|
+
overWrite = True
|
|
762
|
+
file.truncate()
|
|
763
|
+
self.release_file_obj(file)
|
|
764
|
+
if self.verbose:
|
|
765
|
+
self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
|
|
766
|
+
self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
|
|
767
|
+
self.dirty = False
|
|
768
|
+
self.deSynced = False
|
|
769
|
+
except Exception as e:
|
|
770
|
+
self.release_file_obj(file)
|
|
771
|
+
self.__teePrintOrNot(f"Failed to write at mapToFile() to {self._fileName}: {e}",'error')
|
|
586
772
|
import traceback
|
|
587
773
|
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
588
774
|
self.deSynced = True
|
|
589
775
|
return self
|
|
776
|
+
|
|
777
|
+
def checkExternalChanges(self):
|
|
778
|
+
if self.deSynced:
|
|
779
|
+
return self
|
|
780
|
+
realExternalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
|
|
781
|
+
if self.externalFileUpdateTime < realExternalFileUpdateTime:
|
|
782
|
+
self.deSynced = True
|
|
783
|
+
self.__teePrintOrNot(f"External changes detected in {self._fileName}")
|
|
784
|
+
elif self.externalFileUpdateTime > realExternalFileUpdateTime:
|
|
785
|
+
self.__teePrintOrNot(f"Time anomalies detected in {self._fileName}, resetting externalFileUpdateTime")
|
|
786
|
+
self.externalFileUpdateTime = realExternalFileUpdateTime
|
|
787
|
+
return self
|
|
590
788
|
|
|
591
789
|
def _appendWorker(self):
|
|
592
790
|
while not self.shutdownEvent.is_set():
|
|
791
|
+
self.checkExternalChanges()
|
|
593
792
|
self.rewrite()
|
|
594
793
|
self.commitAppendToFile()
|
|
595
794
|
time.sleep(self.append_check_delay)
|
|
@@ -601,6 +800,11 @@ memoryOnly:{self.memoryOnly}
|
|
|
601
800
|
|
|
602
801
|
def commitAppendToFile(self):
|
|
603
802
|
if self.appendQueue:
|
|
803
|
+
if self.memoryOnly:
|
|
804
|
+
self.appendQueue.clear()
|
|
805
|
+
if self.verbose:
|
|
806
|
+
self.__teePrintOrNot(f"Memory only mode. Append queue cleared.")
|
|
807
|
+
return self
|
|
604
808
|
try:
|
|
605
809
|
if self.verbose:
|
|
606
810
|
self.__teePrintOrNot(f"Commiting {len(self.appendQueue)} records to {self._fileName}")
|
|
@@ -614,6 +818,7 @@ memoryOnly:{self.memoryOnly}
|
|
|
614
818
|
self.__teePrintOrNot(f"Records commited to {self._fileName}")
|
|
615
819
|
self.__teePrintOrNot(f"After size of {self._fileName}: {os.path.getsize(self._fileName)}")
|
|
616
820
|
except Exception as e:
|
|
821
|
+
self.release_file_obj(file)
|
|
617
822
|
self.__teePrintOrNot(f"Failed to write at commitAppendToFile to {self._fileName}: {e}",'error')
|
|
618
823
|
import traceback
|
|
619
824
|
self.__teePrintOrNot(traceback.format_exc(),'error')
|
|
@@ -648,12 +853,20 @@ memoryOnly:{self.memoryOnly}
|
|
|
648
853
|
if self.verbose:
|
|
649
854
|
self.__teePrintOrNot(f"File {self._fileName} locked with mode {modes}")
|
|
650
855
|
except Exception as e:
|
|
651
|
-
|
|
652
|
-
|
|
856
|
+
try:
|
|
857
|
+
self.writeLock.release() # Release the thread lock in case of an error
|
|
858
|
+
except Exception as e:
|
|
859
|
+
self.__teePrintOrNot(f"Failed to release writeLock for {self._fileName}: {e}",'error')
|
|
860
|
+
self.__teePrintOrNot(f"Failed to open file {self._fileName}: {e}",'error')
|
|
653
861
|
return file
|
|
654
862
|
|
|
655
863
|
def release_file_obj(self,file):
|
|
864
|
+
# if write lock is already released, return
|
|
865
|
+
if not self.writeLock.locked():
|
|
866
|
+
return
|
|
656
867
|
try:
|
|
868
|
+
file.flush() # Ensure the file is flushed before unlocking
|
|
869
|
+
os.fsync(file.fileno()) # Ensure the file is synced to disk before unlocking
|
|
657
870
|
if os.name == 'posix':
|
|
658
871
|
fcntl.lockf(file, fcntl.LOCK_UN)
|
|
659
872
|
elif os.name == 'nt':
|
|
@@ -664,6 +877,49 @@ memoryOnly:{self.memoryOnly}
|
|
|
664
877
|
if self.verbose:
|
|
665
878
|
self.__teePrintOrNot(f"File {file.name} unlocked / released")
|
|
666
879
|
except Exception as e:
|
|
667
|
-
|
|
880
|
+
self.__teePrintOrNot(f"Failed to release file {file.name}: {e}",'error')
|
|
668
881
|
finally:
|
|
669
|
-
|
|
882
|
+
try:
|
|
883
|
+
self.writeLock.release() # Ensure the thread lock is always released
|
|
884
|
+
except Exception as e:
|
|
885
|
+
self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
|
|
886
|
+
self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def __main__():
|
|
890
|
+
import argparse
|
|
891
|
+
parser = argparse.ArgumentParser(description='TSVZed: A TSV file manager')
|
|
892
|
+
parser.add_argument('filename', type=str, help='The TSV file to read')
|
|
893
|
+
parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear'], help='The operation to perform. Default: read', default='read')
|
|
894
|
+
parser.add_argument('line', type=str, nargs='*', help='The line to append to the TSV file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
|
|
895
|
+
parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the TSV file. seperate using \\t')
|
|
896
|
+
parser.add_argument('-f', '--force', action='store_true', help='Force the operation. Ignore checks for column numbers / headers')
|
|
897
|
+
parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
|
|
898
|
+
parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
|
|
899
|
+
args = parser.parse_args()
|
|
900
|
+
|
|
901
|
+
header = args.header.replace('\\t','\t') if args.header else ''
|
|
902
|
+
|
|
903
|
+
if args.operation == 'read':
|
|
904
|
+
# check if the file exist
|
|
905
|
+
if not os.path.isfile(args.filename):
|
|
906
|
+
print(f"File not found: {args.filename}")
|
|
907
|
+
return
|
|
908
|
+
# read the file
|
|
909
|
+
data = readTSV(args.filename, verifyHeader = False, verbose=args.verbose,strict= not args.force)
|
|
910
|
+
print(pretty_format_table(data.values()))
|
|
911
|
+
elif args.operation == 'append':
|
|
912
|
+
appendTSV(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force)
|
|
913
|
+
elif args.operation == 'delete':
|
|
914
|
+
appendTSV(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force)
|
|
915
|
+
elif args.operation == 'clear':
|
|
916
|
+
clearTSV(args.filename, header=header, verbose=args.verbose, verifyHeader=not args.force)
|
|
917
|
+
else:
|
|
918
|
+
print("Invalid operation")
|
|
919
|
+
return
|
|
920
|
+
|
|
921
|
+
if __name__ == '__main__':
|
|
922
|
+
__main__()
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
|
TSVZ-2.57.dist-info/METADATA
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: TSVZ
|
|
3
|
-
Version: 2.57
|
|
4
|
-
Summary: An simple in memory wrapper around a TSV file to function as a database
|
|
5
|
-
Home-page: https://github.com/yufei-pan/TSVZ
|
|
6
|
-
Author: Yufei Pan
|
|
7
|
-
Author-email: pan@zopyr.us
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.6
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
|
|
15
|
-
This lib provides some helper funtions to interact with tsv ( tab seperated values ) files.
|
|
16
|
-
|
|
17
|
-
TSVZ can also funtion like an in memory DB that is able to perform non-blocking read / write to TSV files.
|
TSVZ-2.57.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
TSVZ.py,sha256=pLi2pADqECVnafOzl8tZSQnytATuCdYUgZwwqm7vIm4,29673
|
|
2
|
-
TSVZ-2.57.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
-
TSVZ-2.57.dist-info/METADATA,sha256=UudrakeV_BECAS6aFeEZ8n5HBvyc0aFlKiPYzNdRB_E,703
|
|
4
|
-
TSVZ-2.57.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
|
5
|
-
TSVZ-2.57.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
|
|
6
|
-
TSVZ-2.57.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|