TSVZ 2.57__py3-none-any.whl → 2.65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.1
2
+ Name: TSVZ
3
+ Version: 2.65
4
+ Summary: An simple in memory wrapper around a TSV file to function as a database
5
+ Home-page: https://github.com/yufei-pan/TSVZ
6
+ Author: Yufei Pan
7
+ Author-email: pan@zopyr.us
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.6
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+
15
+ This lib provides some helper funtions to interact with tsv ( tab seperated values ) files.
16
+
17
+ TSVZ can also funtion like an in memory DB that is able to perform non-blocking read / write to TSV files.
18
+
19
+ Import as a lib or use console tool:
20
+
21
+ ```bash
22
+ tsvz -h
23
+ ```
24
+
25
+ ```bash
26
+ TSVZ -h
27
+ ```
28
+
29
+ ```bash
30
+ usage: TSVZ [-h] [-c HEADER] [-f] [-v] [-V] filename [{read,append,delete,clear}] [line ...]
31
+
32
+ TSVZed: A TSV file manager
33
+
34
+ positional arguments:
35
+ filename The TSV file to read
36
+ {read,append,delete,clear}
37
+ The operation to perform. Default: read
38
+ line The line to append to the TSV file. it follows as : \{key\} \{value1\} \{value2\} ... if a key without value be
39
+ inserted, the value will get deleted.
40
+
41
+ options:
42
+ -h, --help show this help message and exit
43
+ -c HEADER, --header HEADER
44
+ Perform checks with this header of the TSV file. seperate using \t
45
+ -f, --force Force the operation. Ignore checks for column numbers / headers
46
+ -v, --verbose Print verbose output
47
+ -V, --version show program's version number and exit
48
+ ```
@@ -0,0 +1,7 @@
1
+ TSVZ.py,sha256=tDsAV_zNosNEn5-FifNSgacdr9T-6zsQuZPZFkVakIc,42423
2
+ TSVZ-2.65.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
+ TSVZ-2.65.dist-info/METADATA,sha256=NxGY5k380-pEPwsTqQdUo3F52jiH8QexSbLcGN4Me94,1689
4
+ TSVZ-2.65.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
5
+ TSVZ-2.65.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
6
+ TSVZ-2.65.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
7
+ TSVZ-2.65.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.1.2)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ TSVZ = TSVZ:__main__
3
+ tsvz = TSVZ:__main__
TSVZ.py CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- import os
2
+ import os , sys
3
3
  from collections import OrderedDict , deque
4
4
  import time
5
5
  import atexit
@@ -10,7 +10,53 @@ if os.name == 'nt':
10
10
  elif os.name == 'posix':
11
11
  import fcntl
12
12
 
13
- version = '2.57'
13
+ version = '2.65'
14
+ author = 'pan@zopyr.us'
15
+
16
+
17
+ def pretty_format_table(data):
18
+ if not data:
19
+ return
20
+ if type(data) == str:
21
+ data = data.strip('\n').split('\n')
22
+ elif type(data) != list:
23
+ data = list(data)
24
+ num_cols = len(data[0])
25
+ col_widths = [0] * num_cols
26
+ # Calculate the maximum width of each column
27
+ for c in range(num_cols):
28
+ col_items = [str(row[c]) for row in data]
29
+ col_widths[c] = max(len(item) for item in col_items)
30
+ # Build the row format string
31
+ row_format = ' | '.join('{{:<{}}}'.format(width) for width in col_widths)
32
+ # Print the header
33
+ header = data[0]
34
+ outTable = []
35
+ outTable.append(row_format.format(*header))
36
+ outTable.append('-+-'.join('-' * width for width in col_widths))
37
+ for row in data[1:]:
38
+ outTable.append(row_format.format(*row))
39
+ return '\n'.join(outTable) + '\n'
40
+
41
+ def __teePrintOrNot(message,level = 'info',teeLogger = None):
42
+ """
43
+ Prints the given message or logs it using the provided teeLogger.
44
+
45
+ Parameters:
46
+ message (str): The message to be printed or logged.
47
+ level (str, optional): The log level. Defaults to 'info'.
48
+ teeLogger (object, optional): The logger object used for logging. Defaults to None.
49
+
50
+ Returns:
51
+ None
52
+ """
53
+ try:
54
+ if teeLogger:
55
+ teeLogger.teelog(message,level)
56
+ else:
57
+ print(message,flush=True)
58
+ except Exception as e:
59
+ print(message,flush=True)
14
60
 
15
61
  def processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True):
16
62
  """
@@ -138,6 +184,87 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
138
184
  # Return empty list if no valid line found
139
185
  return last_valid_line
140
186
 
187
+ def formatHeader(header,verbose = False,teeLogger = None):
188
+ """
189
+ Format the header string.
190
+
191
+ Parameters:
192
+ - header (str or list): The header string or list to format.
193
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
194
+ - teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
195
+
196
+ Returns:
197
+ str: The formatted header string.
198
+ """
199
+ if type(header) != str:
200
+ try:
201
+ header = '\t'.join(header)
202
+ except:
203
+ if verbose:
204
+ __teePrintOrNot('Invalid header, setting header to empty.','error',teeLogger=teeLogger)
205
+ header = ''
206
+ header = header.strip()
207
+ # if header:
208
+ # if not header.endswith('\n'):
209
+ # header += '\n'
210
+ # else:
211
+ # header = ''
212
+ return header
213
+
214
+ def lineContainHeader(header,line,verbose = False,teeLogger = None,strict = False):
215
+ """
216
+ Verify if a line contains the header.
217
+
218
+ Parameters:
219
+ - header (str): The header string to verify.
220
+ - line (str): The line to verify against the header.
221
+ - verbose (bool, optional): Whether to print verbose output. Defaults to False.
222
+ - teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
223
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to False.
224
+
225
+ Returns:
226
+ bool: True if the header matches the line, False otherwise.
227
+ """
228
+ if verbose:
229
+ __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
230
+ __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
231
+ if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
232
+ __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
233
+ if strict:
234
+ raise Exception("Data format error! Header mismatch")
235
+ return False
236
+ return True
237
+
238
+ def verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = None,header = '',encoding = 'utf8',strict = True):
239
+ """
240
+ Verify the existence of a TSV file.
241
+
242
+ Parameters:
243
+ - fileName (str): The path of the TSV file.
244
+ - createIfNotExist (bool, optional): Whether to create the file if it doesn't exist. Defaults to True.
245
+ - teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
246
+ - header (str, optional): The header line to verify against. Defaults to ''.
247
+ - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
248
+ - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
249
+
250
+ Returns:
251
+ bool: True if the file exists, False otherwise.
252
+ """
253
+ if not fileName.endswith('.tsv'):
254
+ __teePrintOrNot(f'Warning: Filename {fileName} does not end with .tsv','warning',teeLogger=teeLogger)
255
+ if not os.path.isfile(fileName):
256
+ if createIfNotExist:
257
+ with open(fileName, mode ='w',encoding=encoding)as file:
258
+ file.write(header+'\n')
259
+ __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
260
+ return True
261
+ elif strict:
262
+ __teePrintOrNot('File not found','error',teeLogger=teeLogger)
263
+ raise Exception("File not found")
264
+ else:
265
+ return False
266
+ return True
267
+
141
268
  def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True):
142
269
  """
143
270
  Read a TSV (Tab-Separated Values) file and return the data as a dictionary.
@@ -150,7 +277,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
150
277
  - lastLineOnly (bool, optional): Whether to read only the last valid line of the file. Defaults to False.
151
278
  - verifyHeader (bool, optional): Whether to verify the header of the file. Defaults to True.
152
279
  - verbose (bool, optional): Whether to print verbose output. Defaults to False.
153
- - taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to None.
280
+ - taskDic (OrderedDict, optional): The dictionary to store the data. Defaults to an empty OrderedDict.
154
281
  - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
155
282
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
156
283
 
@@ -162,36 +289,19 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
162
289
 
163
290
  """
164
291
  if taskDic is None:
165
- taskDic = OrderedDict()
166
-
167
- header = header.strip() if type(header) == str else '\t'.join(header)
168
- if not header.endswith('\n'):
169
- header += '\n'
170
- if not os.path.isfile(fileName):
171
- if createIfNotExist:
172
- with open(fileName, mode ='w',encoding=encoding)as file:
173
- file.write(header)
174
- __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
175
- verifyHeader = True
176
- else:
177
- __teePrintOrNot('File not found','error',teeLogger=teeLogger)
178
- raise Exception("File not found")
292
+ taskDic = {}
293
+ header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
294
+ if not verifyTSVExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict):
295
+ return taskDic
179
296
  with open(fileName, mode ='rb')as file:
297
+ correctColumnNum = -1
180
298
  if header.strip():
181
299
  if verifyHeader:
182
300
  line = file.readline().decode().strip()
183
- if verbose:
184
- __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
185
- __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
186
- #assert line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')), "Data format error!"
187
- if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
188
- __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
189
- raise Exception("Data format error! Header mismatch")
190
- correctColumnNum = len(header.strip().split('\t'))
191
- if verbose:
192
- __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
193
- else:
194
- correctColumnNum = -1
301
+ if lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
302
+ correctColumnNum = len(header.strip().split('\t'))
303
+ if verbose:
304
+ __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
195
305
  if lastLineOnly:
196
306
  lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict)
197
307
  if lineCache:
@@ -201,27 +311,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
201
311
  correctColumnNum, lineCache = processLine(line,taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict)
202
312
  return taskDic
203
313
 
204
- def __teePrintOrNot(message,level = 'info',teeLogger = None):
205
- """
206
- Prints the given message or logs it using the provided teeLogger.
207
-
208
- Parameters:
209
- message (str): The message to be printed or logged.
210
- level (str, optional): The log level. Defaults to 'info'.
211
- teeLogger (object, optional): The logger object used for logging. Defaults to None.
212
-
213
- Returns:
214
- None
215
- """
216
- try:
217
- if teeLogger:
218
- teeLogger.teelog(message,level)
219
- else:
220
- print(message)
221
- except Exception as e:
222
- print(message)
223
-
224
- def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8'):
314
+ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True):
225
315
  """
226
316
  Append a line of data to a TSV file.
227
317
  Parameters:
@@ -233,41 +323,26 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
233
323
  - verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
234
324
  - verbose (bool, optional): If True, additional information will be printed during the execution.
235
325
  - encoding (str, optional): The encoding of the file.
326
+ - strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
236
327
  Raises:
237
328
  - Exception: If the file does not exist and createIfNotExist is False.
238
329
  - Exception: If the existing header does not match the provided header.
239
330
  """
240
- if not header.endswith('\n'):
241
- header += '\n'
242
- if not os.path.isfile(fileName):
243
- if createIfNotExist:
244
- with open(fileName, mode ='w',encoding=encoding)as file:
245
- file.write(header)
246
- __teePrintOrNot('Created '+fileName,teeLogger=teeLogger)
247
- verifyHeader = True
248
- else:
249
- __teePrintOrNot('File not found','error',teeLogger=teeLogger)
250
- raise Exception("File not found")
251
-
331
+ header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
332
+ if not verifyTSVExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict):
333
+ return
252
334
  if type(lineToAppend) == str:
253
335
  lineToAppend = lineToAppend.strip().split('\t')
254
336
 
255
337
  with open(fileName, mode ='r+b')as file:
338
+ correctColumnNum = len(lineToAppend)
256
339
  if header.strip():
257
340
  if verifyHeader:
258
341
  line = file.readline().decode().strip()
259
- if verbose:
260
- __teePrintOrNot(f"Header: {header.strip()}",teeLogger=teeLogger)
261
- __teePrintOrNot(f"First line: {line}",teeLogger=teeLogger)
262
- #assert line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')), "Data format error!"
263
- if not line.lower().replace(' ','').startswith(header.strip().lower().replace(' ','')):
264
- __teePrintOrNot(f"Header mismatch: \n{line} \n!= \n{header.strip()}",teeLogger=teeLogger)
265
- raise Exception("Data format error! Header mismatch")
266
- correctColumnNum = len(header.strip().split('\t'))
267
- if verbose:
268
- __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
269
- else:
270
- correctColumnNum = len(lineToAppend)
342
+ if lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
343
+ correctColumnNum = len(header.strip().split('\t'))
344
+ if verbose:
345
+ __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
271
346
  # truncate / fill the lineToAppend to the correct number of columns
272
347
  if len(lineToAppend) < correctColumnNum:
273
348
  lineToAppend += ['']*(correctColumnNum-len(lineToAppend))
@@ -281,6 +356,40 @@ def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExis
281
356
  if verbose:
282
357
  __teePrintOrNot(f"Appended {lineToAppend} to {fileName}",teeLogger=teeLogger)
283
358
 
359
+ def clearTSV(fileName,teeLogger = None,header = '',verifyHeader = False,verbose = False,encoding = 'utf8',strict = False):
360
+ """
361
+ Clear the contents of a TSV file. Will create if not exist.
362
+ Parameters:
363
+ - fileName (str): The path of the TSV file.
364
+ - teeLogger (optional): A logger object for logging messages.
365
+ - header (str, optional): The header line to verify against. If provided, the function will check if the existing header matches the provided header.
366
+ - verifyHeader (bool, optional): If True, the function will verify if the existing header matches the provided header. If False, the header will not be verified.
367
+ - verbose (bool, optional): If True, additional information will be printed during the execution.
368
+ - encoding (str, optional): The encoding of the file.
369
+ - strict (bool, optional): If True, the function will raise an exception if there is a data format error. If False, the function will ignore the error and continue.
370
+ """
371
+ header = formatHeader(header,verbose = verbose,teeLogger = teeLogger)
372
+ if not verifyTSVExistence(fileName,createIfNotExist = True,teeLogger = teeLogger,header = header,encoding = encoding,strict = False):
373
+ raise Exception("Something catastrophic happened! File still not found after creation")
374
+ else:
375
+ with open(fileName, mode ='r+',encoding=encoding)as file:
376
+ if header.strip() and verifyHeader:
377
+ line = file.readline().strip()
378
+ if not lineContainHeader(header,line,verbose = verbose,teeLogger = teeLogger,strict = strict):
379
+ __teePrintOrNot(f'Warning: Header mismatch in {fileName}. Keeping original header in file...','warning',teeLogger)
380
+ file.truncate()
381
+ else:
382
+ file.write(header+'\n')
383
+ if verbose:
384
+ __teePrintOrNot(f"Cleared {fileName}",teeLogger=teeLogger)
385
+
386
+ def getFileUpdateTimeNs(fileName):
387
+ try:
388
+ return os.stat(fileName).st_mtime_ns
389
+ except:
390
+ __teePrintOrNot(f"Failed to get file update time for {fileName}",'error')
391
+ return time.time_ns()
392
+
284
393
  # create a tsv class that functions like a ordered dictionary but will update the file when modified
285
394
  class TSVZed(OrderedDict):
286
395
  def __teePrintOrNot(self,message,level = 'info'):
@@ -288,16 +397,18 @@ class TSVZed(OrderedDict):
288
397
  if self.teeLogger:
289
398
  self.teeLogger.teelog(message,level)
290
399
  else:
291
- print(message)
400
+ print(message,flush=True)
292
401
  except Exception as e:
293
- print(message)
402
+ print(message,flush=True)
294
403
 
295
404
  def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = None):
296
405
  super().__init__()
297
406
  self.version = version
407
+ self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
408
+ self.lastUpdateTime = self.externalFileUpdateTime
298
409
  self._fileName = fileName
299
410
  self.teeLogger = teeLogger
300
- self.header = header.strip() if type(header) == str else '\t'.join(header)
411
+ self.header = formatHeader(header,verbose = verbose,teeLogger = self.teeLogger)
301
412
  self.correctColumnNum = -1
302
413
  self.createIfNotExist = createIfNotExist
303
414
  self.verifyHeader = verifyHeader
@@ -305,6 +416,8 @@ class TSVZed(OrderedDict):
305
416
  self.rewrite_on_exit = rewrite_on_exit
306
417
  self.rewrite_interval = rewrite_interval
307
418
  self.monitor_external_changes = monitor_external_changes
419
+ if not monitor_external_changes:
420
+ self.__teePrintOrNot(f"Warning: External changes monitoring disabled for {self._fileName}. Will overwrite external changes.",'warning')
308
421
  self.verbose = verbose
309
422
  if append_check_delay < 0:
310
423
  append_check_delay = 0.00001
@@ -345,6 +458,8 @@ class TSVZed(OrderedDict):
345
458
  #super().update(loadedData)
346
459
  if self.verbose:
347
460
  self.__teePrintOrNot(f"TSVZed({self._fileName}) loaded")
461
+ self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
462
+ self.lastUpdateTime = self.externalFileUpdateTime
348
463
  self.memoryOnly = mo
349
464
  return self
350
465
 
@@ -388,6 +503,7 @@ class TSVZed(OrderedDict):
388
503
  if self.verbose:
389
504
  self.__teePrintOrNot(f"Appending {key} to the appendQueue")
390
505
  self.appendQueue.append('\t'.join(value))
506
+ self.lastUpdateTime = time.time_ns()
391
507
  # if not self.appendThread.is_alive():
392
508
  # self.commitAppendToFile()
393
509
  # else:
@@ -405,6 +521,7 @@ class TSVZed(OrderedDict):
405
521
  if self.memoryOnly:
406
522
  return
407
523
  self.__appendEmptyLine(key)
524
+ self.lastUpdateTime = time.time_ns()
408
525
 
409
526
  def __appendEmptyLine(self,key):
410
527
  self.dirty = True
@@ -428,6 +545,7 @@ class TSVZed(OrderedDict):
428
545
  if self.memoryOnly:
429
546
  return self
430
547
  self.clear_file()
548
+ self.lastUpdateTime = self.externalFileUpdateTime
431
549
  return self
432
550
 
433
551
  def clear_file(self):
@@ -448,6 +566,7 @@ class TSVZed(OrderedDict):
448
566
  self.dirty = False
449
567
  self.deSynced = False
450
568
  except Exception as e:
569
+ self.release_file_obj(file)
451
570
  self.__teePrintOrNot(f"Failed to write at clear_file() to {self._fileName}: {e}",'error')
452
571
  import traceback
453
572
  self.__teePrintOrNot(traceback.format_exc(),'error')
@@ -495,6 +614,7 @@ memoryOnly:{self.memoryOnly}
495
614
  key, value = super().popitem(last)
496
615
  if not self.memoryOnly:
497
616
  self.__appendEmptyLine(key)
617
+ self.lastUpdateTime = time.time_ns()
498
618
  return key, value
499
619
 
500
620
  __marker = object()
@@ -512,6 +632,7 @@ memoryOnly:{self.memoryOnly}
512
632
  value = super().pop(key)
513
633
  if not self.memoryOnly:
514
634
  self.__appendEmptyLine(key)
635
+ self.lastUpdateTime = time.time_ns()
515
636
  return value
516
637
 
517
638
  def move_to_end(self, key, last=True):
@@ -526,6 +647,7 @@ memoryOnly:{self.memoryOnly}
526
647
  self.__teePrintOrNot(f"rewrite_on_exit set to True")
527
648
  if self.verbose:
528
649
  self.__teePrintOrNot(f"Warning: Trying to move Key {key} moved to {'end' if last else 'beginning'} Need to resync for changes to apply to disk")
650
+ self.lastUpdateTime = time.time_ns()
529
651
  return self
530
652
 
531
653
  @classmethod
@@ -539,23 +661,29 @@ memoryOnly:{self.memoryOnly}
539
661
 
540
662
 
541
663
  def rewrite(self,force = False,reloadInternalFromFile = None):
542
- if not self.dirty and not force:
543
- return False
544
664
  if not self.deSynced and not force:
665
+ if not self.dirty:
666
+ return False
545
667
  if self.rewrite_interval == 0 or time.time() - os.path.getmtime(self._fileName) < self.rewrite_interval:
546
668
  return False
547
669
  try:
548
- if self.verbose:
549
- self.__teePrintOrNot(f"Rewriting {self._fileName}")
670
+
550
671
  if reloadInternalFromFile is None:
551
672
  reloadInternalFromFile = self.monitor_external_changes
552
- if reloadInternalFromFile:
673
+ if reloadInternalFromFile and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
553
674
  # this will be needed if more than 1 process is accessing the file
554
675
  self.commitAppendToFile()
555
676
  self.reload()
556
- self.mapToFile()
557
- if self.verbose:
558
- self.__teePrintOrNot(f"{len(self)} records rewrote to {self._fileName}")
677
+ if self.memoryOnly:
678
+ if self.verbose:
679
+ self.__teePrintOrNot(f"Memory only mode. Map to file skipped.")
680
+ return False
681
+ if self.dirty:
682
+ if self.verbose:
683
+ self.__teePrintOrNot(f"Rewriting {self._fileName}")
684
+ self.mapToFile()
685
+ if self.verbose:
686
+ self.__teePrintOrNot(f"{len(self)} records rewrote to {self._fileName}")
559
687
  if not self.appendThread.is_alive():
560
688
  self.commitAppendToFile()
561
689
  # else:
@@ -568,8 +696,10 @@ memoryOnly:{self.memoryOnly}
568
696
  self.deSynced = True
569
697
  return False
570
698
 
571
- def mapToFile(self):
699
+ def oldMapToFile(self):
572
700
  try:
701
+ if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
702
+ self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
573
703
  file = self.get_file_obj('w')
574
704
  if self.header:
575
705
  file.write(self.header+'\n')
@@ -582,14 +712,83 @@ memoryOnly:{self.memoryOnly}
582
712
  self.dirty = False
583
713
  self.deSynced = False
584
714
  except Exception as e:
585
- self.__teePrintOrNot(f"Failed to write at dumpToFile() to {self._fileName}: {e}",'error')
715
+ self.release_file_obj(file)
716
+ self.__teePrintOrNot(f"Failed to write at oldMapToFile() to {self._fileName}: {e}",'error')
717
+ import traceback
718
+ self.__teePrintOrNot(traceback.format_exc(),'error')
719
+ self.deSynced = True
720
+ return self
721
+
722
+ def mapToFile(self):
723
+ try:
724
+ if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
725
+ self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
726
+ file = self.get_file_obj('r+')
727
+ overWrite = False
728
+ line = file.readline()
729
+ aftPos = file.tell()
730
+ if self.header and not lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = False):
731
+ file.seek(0)
732
+ file.write(self.header+'\n')
733
+ # if the header is not the same length as the line, we need to overwrite the file
734
+ if aftPos != file.tell():
735
+ overWrite = True
736
+ if self.verbose:
737
+ self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
738
+ for value in self.values():
739
+ strToWrite = '\t'.join(value)+'\n'
740
+ if overWrite:
741
+ if self.verbose:
742
+ self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
743
+ file.write(strToWrite)
744
+ continue
745
+ pos = file.tell()
746
+ line = file.readline()
747
+ aftPos = file.tell()
748
+ if not line or pos == aftPos:
749
+ if self.verbose:
750
+ self.__teePrintOrNot(f"End of file reached. Appending {value} to {self._fileName}")
751
+ file.write(strToWrite)
752
+ overWrite = True
753
+ continue
754
+ if line != strToWrite:
755
+ if self.verbose:
756
+ self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
757
+ file.seek(pos)
758
+ # fill the string with space to write to the correct length
759
+ file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
760
+ if aftPos != file.tell():
761
+ overWrite = True
762
+ file.truncate()
763
+ self.release_file_obj(file)
764
+ if self.verbose:
765
+ self.__teePrintOrNot(f"{len(self)} records written to {self._fileName}")
766
+ self.__teePrintOrNot(f"File {self._fileName} size: {os.path.getsize(self._fileName)}")
767
+ self.dirty = False
768
+ self.deSynced = False
769
+ except Exception as e:
770
+ self.release_file_obj(file)
771
+ self.__teePrintOrNot(f"Failed to write at mapToFile() to {self._fileName}: {e}",'error')
586
772
  import traceback
587
773
  self.__teePrintOrNot(traceback.format_exc(),'error')
588
774
  self.deSynced = True
589
775
  return self
776
+
777
+ def checkExternalChanges(self):
778
+ if self.deSynced:
779
+ return self
780
+ realExternalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
781
+ if self.externalFileUpdateTime < realExternalFileUpdateTime:
782
+ self.deSynced = True
783
+ self.__teePrintOrNot(f"External changes detected in {self._fileName}")
784
+ elif self.externalFileUpdateTime > realExternalFileUpdateTime:
785
+ self.__teePrintOrNot(f"Time anomalies detected in {self._fileName}, resetting externalFileUpdateTime")
786
+ self.externalFileUpdateTime = realExternalFileUpdateTime
787
+ return self
590
788
 
591
789
  def _appendWorker(self):
592
790
  while not self.shutdownEvent.is_set():
791
+ self.checkExternalChanges()
593
792
  self.rewrite()
594
793
  self.commitAppendToFile()
595
794
  time.sleep(self.append_check_delay)
@@ -601,6 +800,11 @@ memoryOnly:{self.memoryOnly}
601
800
 
602
801
  def commitAppendToFile(self):
603
802
  if self.appendQueue:
803
+ if self.memoryOnly:
804
+ self.appendQueue.clear()
805
+ if self.verbose:
806
+ self.__teePrintOrNot(f"Memory only mode. Append queue cleared.")
807
+ return self
604
808
  try:
605
809
  if self.verbose:
606
810
  self.__teePrintOrNot(f"Commiting {len(self.appendQueue)} records to {self._fileName}")
@@ -614,6 +818,7 @@ memoryOnly:{self.memoryOnly}
614
818
  self.__teePrintOrNot(f"Records commited to {self._fileName}")
615
819
  self.__teePrintOrNot(f"After size of {self._fileName}: {os.path.getsize(self._fileName)}")
616
820
  except Exception as e:
821
+ self.release_file_obj(file)
617
822
  self.__teePrintOrNot(f"Failed to write at commitAppendToFile to {self._fileName}: {e}",'error')
618
823
  import traceback
619
824
  self.__teePrintOrNot(traceback.format_exc(),'error')
@@ -648,12 +853,20 @@ memoryOnly:{self.memoryOnly}
648
853
  if self.verbose:
649
854
  self.__teePrintOrNot(f"File {self._fileName} locked with mode {modes}")
650
855
  except Exception as e:
651
- self.writeLock.release() # Release the thread lock in case of an error
652
- raise e # Re-raise the exception to handle it outside or notify the user
856
+ try:
857
+ self.writeLock.release() # Release the thread lock in case of an error
858
+ except Exception as e:
859
+ self.__teePrintOrNot(f"Failed to release writeLock for {self._fileName}: {e}",'error')
860
+ self.__teePrintOrNot(f"Failed to open file {self._fileName}: {e}",'error')
653
861
  return file
654
862
 
655
863
  def release_file_obj(self,file):
864
+ # if write lock is already released, return
865
+ if not self.writeLock.locked():
866
+ return
656
867
  try:
868
+ file.flush() # Ensure the file is flushed before unlocking
869
+ os.fsync(file.fileno()) # Ensure the file is synced to disk before unlocking
657
870
  if os.name == 'posix':
658
871
  fcntl.lockf(file, fcntl.LOCK_UN)
659
872
  elif os.name == 'nt':
@@ -664,6 +877,49 @@ memoryOnly:{self.memoryOnly}
664
877
  if self.verbose:
665
878
  self.__teePrintOrNot(f"File {file.name} unlocked / released")
666
879
  except Exception as e:
667
- raise e # Re-raise the exception for external handling
880
+ self.__teePrintOrNot(f"Failed to release file {file.name}: {e}",'error')
668
881
  finally:
669
- self.writeLock.release() # Ensure the thread lock is always released
882
+ try:
883
+ self.writeLock.release() # Ensure the thread lock is always released
884
+ except Exception as e:
885
+ self.__teePrintOrNot(f"Failed to release writeLock for {file.name}: {e}",'error')
886
+ self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
887
+
888
+
889
+ def __main__():
890
+ import argparse
891
+ parser = argparse.ArgumentParser(description='TSVZed: A TSV file manager')
892
+ parser.add_argument('filename', type=str, help='The TSV file to read')
893
+ parser.add_argument('operation', type=str,nargs='?', choices=['read','append','delete','clear'], help='The operation to perform. Default: read', default='read')
894
+ parser.add_argument('line', type=str, nargs='*', help='The line to append to the TSV file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
895
+ parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the TSV file. seperate using \\t')
896
+ parser.add_argument('-f', '--force', action='store_true', help='Force the operation. Ignore checks for column numbers / headers')
897
+ parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
898
+ parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
899
+ args = parser.parse_args()
900
+
901
+ header = args.header.replace('\\t','\t') if args.header else ''
902
+
903
+ if args.operation == 'read':
904
+ # check if the file exist
905
+ if not os.path.isfile(args.filename):
906
+ print(f"File not found: {args.filename}")
907
+ return
908
+ # read the file
909
+ data = readTSV(args.filename, verifyHeader = False, verbose=args.verbose,strict= not args.force)
910
+ print(pretty_format_table(data.values()))
911
+ elif args.operation == 'append':
912
+ appendTSV(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force)
913
+ elif args.operation == 'delete':
914
+ appendTSV(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force)
915
+ elif args.operation == 'clear':
916
+ clearTSV(args.filename, header=header, verbose=args.verbose, verifyHeader=not args.force)
917
+ else:
918
+ print("Invalid operation")
919
+ return
920
+
921
+ if __name__ == '__main__':
922
+ __main__()
923
+
924
+
925
+
@@ -1,17 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: TSVZ
3
- Version: 2.57
4
- Summary: An simple in memory wrapper around a TSV file to function as a database
5
- Home-page: https://github.com/yufei-pan/TSVZ
6
- Author: Yufei Pan
7
- Author-email: pan@zopyr.us
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.6
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
-
15
- This lib provides some helper funtions to interact with tsv ( tab seperated values ) files.
16
-
17
- TSVZ can also funtion like an in memory DB that is able to perform non-blocking read / write to TSV files.
@@ -1,6 +0,0 @@
1
- TSVZ.py,sha256=pLi2pADqECVnafOzl8tZSQnytATuCdYUgZwwqm7vIm4,29673
2
- TSVZ-2.57.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- TSVZ-2.57.dist-info/METADATA,sha256=UudrakeV_BECAS6aFeEZ8n5HBvyc0aFlKiPYzNdRB_E,703
4
- TSVZ-2.57.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
5
- TSVZ-2.57.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
- TSVZ-2.57.dist-info/RECORD,,
File without changes