TSVZ 3.2__py3-none-any.whl → 3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: TSVZ
3
- Version: 3.2
3
+ Version: 3.11
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -0,0 +1,6 @@
1
+ TSVZ.py,sha256=FWLiXvU0sQhDr7yCNl5gGM4eYqMXV7QfnA6ZgWQ3aY0,64806
2
+ TSVZ-3.11.dist-info/METADATA,sha256=wTmWgGYLCDIhLsS7Rm1IIllV44vAe3EYIKFPJQExwbg,1826
3
+ TSVZ-3.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ TSVZ-3.11.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
+ TSVZ-3.11.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
+ TSVZ-3.11.dist-info/RECORD,,
TSVZ.py CHANGED
@@ -6,15 +6,22 @@ import atexit
6
6
  import threading
7
7
  import re
8
8
 
9
+ RESOURCE_LIB_AVAILABLE = True
10
+ try:
11
+ import resource
12
+ except:
13
+ RESOURCE_LIB_AVAILABLE = False
14
+
9
15
  if os.name == 'nt':
10
16
  import msvcrt
11
17
  elif os.name == 'posix':
12
18
  import fcntl
13
19
 
14
- version = '3.02'
20
+ version = '3.11'
15
21
  author = 'pan@zopyr.us'
16
22
 
17
23
  DEFAULT_DELIMITER = '\t'
24
+ DEFAULTS_INDICATOR_KEY = '#_defaults_#'
18
25
 
19
26
  def get_delimiter(delimiter,file_name = ''):
20
27
  if not delimiter:
@@ -89,6 +96,125 @@ def pretty_format_table(data, delimiter = DEFAULT_DELIMITER):
89
96
  outTable.append(row_format.format(*row))
90
97
  return '\n'.join(outTable) + '\n'
91
98
 
99
+ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_format='.2f'):
100
+ """
101
+ Format the size in bytes to a human-readable format or vice versa.
102
+
103
+ Args:
104
+ size (int or str): The size in bytes or a string representation of the size.
105
+ use_1024_bytes (bool, optional): Whether to use 1024 bytes as the base for conversion. If None, it will be determined automatically. Default is None.
106
+ to_int (bool, optional): Whether to convert the size to an integer. Default is False.
107
+ to_str (bool, optional): Whether to convert the size to a string representation. Default is False.
108
+ str_format (str, optional): The format string to use when converting the size to a string. Default is '.2f'.
109
+
110
+ Returns:
111
+ int or str: The formatted size based on the provided arguments.
112
+
113
+ Examples:
114
+ >>> format_bytes(1500)
115
+ '1.50 KB'
116
+ >>> format_bytes('1.5 GiB', to_int=True)
117
+ 1610612736
118
+ """
119
+ if to_int or isinstance(size, str):
120
+ if isinstance(size, int):
121
+ return size
122
+ elif isinstance(size, str):
123
+ # Use regular expression to split the numeric part from the unit, handling optional whitespace
124
+ match = re.match(r"(\d+(\.\d+)?)\s*([a-zA-Z]*)", size)
125
+ if not match:
126
+ print("Invalid size format. Expected format: 'number [unit]', e.g., '1.5 GiB' or '1.5GiB'")
127
+ print(f"Got: {size}")
128
+ return 0
129
+ number, _, unit = match.groups()
130
+ number = float(number)
131
+ unit = unit.strip().lower().rstrip('b')
132
+ # Define the unit conversion dictionary
133
+ if unit.endswith('i'):
134
+ # this means we treat the unit as 1024 bytes if it ends with 'i'
135
+ use_1024_bytes = True
136
+ elif use_1024_bytes is None:
137
+ use_1024_bytes = False
138
+ unit = unit.rstrip('i')
139
+ if use_1024_bytes:
140
+ power = 2**10
141
+ else:
142
+ power = 10**3
143
+ unit_labels = {'': 0, 'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5}
144
+ if unit not in unit_labels:
145
+ print(f"Invalid unit '{unit}'. Expected one of {list(unit_labels.keys())}")
146
+ return 0
147
+ # Calculate the bytes
148
+ return int(number * (power ** unit_labels[unit]))
149
+ else:
150
+ try:
151
+ return int(size)
152
+ except Exception as e:
153
+ return 0
154
+ elif to_str or isinstance(size, int) or isinstance(size, float):
155
+ if isinstance(size, str):
156
+ try:
157
+ size = size.lower().strip().rstrip('b')
158
+ size = float(size)
159
+ except Exception as e:
160
+ return size
161
+ # size is in bytes
162
+ if use_1024_bytes or use_1024_bytes is None:
163
+ power = 2**10
164
+ n = 0
165
+ power_labels = {0 : '', 1: 'Ki', 2: 'Mi', 3: 'Gi', 4: 'Ti', 5: 'Pi'}
166
+ while size > power:
167
+ size /= power
168
+ n += 1
169
+ return f"{size:{str_format}} {power_labels[n]}"
170
+ else:
171
+ power = 10**3
172
+ n = 0
173
+ power_labels = {0 : '', 1: 'K', 2: 'M', 3: 'G', 4: 'T', 5: 'P'}
174
+ while size > power:
175
+ size /= power
176
+ n += 1
177
+ return f"{size:{str_format}} {power_labels[n]}"
178
+ else:
179
+ try:
180
+ return format_bytes(float(size), use_1024_bytes)
181
+ except Exception as e:
182
+ import traceback
183
+ print(f"Error: {e}")
184
+ print(traceback.format_exc())
185
+ print(f"Invalid size: {size}")
186
+ return 0
187
+
188
+ def get_resource_usage(return_dict = False):
189
+ try:
190
+ if RESOURCE_LIB_AVAILABLE:
191
+ rawResource = resource.getrusage(resource.RUSAGE_SELF)
192
+ resourceDict = {}
193
+ resourceDict['user mode time'] = f'{rawResource.ru_utime} seconds'
194
+ resourceDict['system mode time'] = f'{rawResource.ru_stime} seconds'
195
+ resourceDict['max resident set size'] = f'{format_bytes(rawResource.ru_maxrss * 1024)}B'
196
+ resourceDict['shared memory size'] = f'{format_bytes(rawResource.ru_ixrss * 1024)}B'
197
+ resourceDict['unshared memory size'] = f'{format_bytes(rawResource.ru_idrss * 1024)}B'
198
+ resourceDict['unshared stack size'] = f'{format_bytes(rawResource.ru_isrss * 1024)}B'
199
+ resourceDict['cached page hits'] = f'{rawResource.ru_minflt}'
200
+ resourceDict['missed page hits'] = f'{rawResource.ru_majflt}'
201
+ resourceDict['swapped out page count'] = f'{rawResource.ru_nswap}'
202
+ resourceDict['block input operations'] = f'{rawResource.ru_inblock}'
203
+ resourceDict['block output operations'] = f'{rawResource.ru_oublock}'
204
+ resourceDict['IPC messages sent'] = f'{rawResource.ru_msgsnd}'
205
+ resourceDict['IPC messages received'] = f'{rawResource.ru_msgrcv}'
206
+ resourceDict['signals received'] = f'{rawResource.ru_nsignals}'
207
+ resourceDict['voluntary context sw'] = f'{rawResource.ru_nvcsw}'
208
+ resourceDict['involuntary context sw'] = f'{rawResource.ru_nivcsw}'
209
+ if return_dict:
210
+ return resourceDict
211
+ return '\n'.join(['\t'.join(line) for line in resourceDict.items()])
212
+ except Exception as e:
213
+ print(f"Error: {e}")
214
+ if return_dict:
215
+ return {}
216
+ return ''
217
+
92
218
  def __teePrintOrNot(message,level = 'info',teeLogger = None):
93
219
  """
94
220
  Prints the given message or logs it using the provided teeLogger.
@@ -109,7 +235,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
109
235
  except Exception as e:
110
236
  print(message,flush=True)
111
237
 
112
- def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER):
238
+ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = []):
113
239
  """
114
240
  Process a line of text and update the task dictionary.
115
241
 
@@ -120,6 +246,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
120
246
  verbose (bool, optional): Whether to print verbose output. Defaults to False.
121
247
  teeLogger (object, optional): The tee logger object for printing output. Defaults to None.
122
248
  strict (bool, optional): Whether to strictly enforce the correct number of columns. Defaults to True.
249
+ defaults (list, optional): The default values to use for missing columns. Defaults to [].
123
250
 
124
251
  Returns:
125
252
  tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
@@ -131,36 +258,40 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
131
258
  if verbose:
132
259
  __teePrintOrNot(f"Ignoring empty line: {line}",teeLogger=teeLogger)
133
260
  return correctColumnNum , []
134
- if line.startswith('#'):
261
+ if line.startswith('#') and not line.startswith(DEFAULTS_INDICATOR_KEY):
135
262
  if verbose:
136
263
  __teePrintOrNot(f"Ignoring comment line: {line}",teeLogger=teeLogger)
137
264
  return correctColumnNum , []
138
265
  # we only interested in the lines that have the correct number of columns
139
- lineCache = [segment.strip() for segment in line.split(delimiter)]
266
+ lineCache = [segment.rstrip() for segment in line.split(delimiter)]
140
267
  if not lineCache:
141
268
  return correctColumnNum , []
142
269
  if correctColumnNum == -1:
270
+ if defaults and len(defaults) > 1:
271
+ correctColumnNum = len(defaults)
272
+ else:
273
+ correctColumnNum = len(lineCache)
143
274
  if verbose:
144
275
  __teePrintOrNot(f"detected correctColumnNum: {len(lineCache)}",teeLogger=teeLogger)
145
- correctColumnNum = len(lineCache)
146
276
  if not lineCache[0]:
147
277
  if verbose:
148
278
  __teePrintOrNot(f"Ignoring line with empty key: {line}",teeLogger=teeLogger)
149
279
  return correctColumnNum , []
150
280
  if len(lineCache) == 1 or not any(lineCache[1:]):
151
- if correctColumnNum == 1: taskDic[lineCache[0]] = lineCache
281
+ if correctColumnNum == 1:
282
+ taskDic[lineCache[0]] = lineCache
283
+ elif lineCache[0] == DEFAULTS_INDICATOR_KEY:
284
+ if verbose:
285
+ __teePrintOrNot(f"Empty defaults line found: {line}",teeLogger=teeLogger)
286
+ defaults = []
152
287
  else:
153
288
  if verbose:
154
289
  __teePrintOrNot(f"Key {lineCache[0]} found with empty value, deleting such key's representaion",teeLogger=teeLogger)
155
290
  if lineCache[0] in taskDic:
156
291
  del taskDic[lineCache[0]]
157
292
  return correctColumnNum , []
158
- elif len(lineCache) == correctColumnNum:
159
- taskDic[lineCache[0]] = lineCache
160
- if verbose:
161
- __teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
162
- else:
163
- if strict:
293
+ elif len(lineCache) != correctColumnNum:
294
+ if strict and not any(defaults):
164
295
  if verbose:
165
296
  __teePrintOrNot(f"Ignoring line with {len(lineCache)} columns: {line}",teeLogger=teeLogger)
166
297
  return correctColumnNum , []
@@ -170,12 +301,26 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
170
301
  lineCache += ['']*(correctColumnNum-len(lineCache))
171
302
  elif len(lineCache) > correctColumnNum:
172
303
  lineCache = lineCache[:correctColumnNum]
173
- taskDic[lineCache[0]] = lineCache
174
304
  if verbose:
175
- __teePrintOrNot(f"Key {lineCache[0]} added after correction",teeLogger=teeLogger)
305
+ __teePrintOrNot(f"Correcting {lineCache[0]}",teeLogger=teeLogger)
306
+ # now replace empty values with defaults
307
+ if defaults and len(defaults) > 1:
308
+ for i in range(1,len(lineCache)):
309
+ if not lineCache[i] and i < len(defaults) and defaults[i]:
310
+ lineCache[i] = defaults[i]
311
+ if verbose:
312
+ __teePrintOrNot(f"Replacing empty value at {i} with default: {defaults[i]}",teeLogger=teeLogger)
313
+ if lineCache[0] == DEFAULTS_INDICATOR_KEY:
314
+ if verbose:
315
+ __teePrintOrNot(f"Defaults line found: {line}",teeLogger=teeLogger)
316
+ defaults = lineCache
317
+ return correctColumnNum , []
318
+ taskDic[lineCache[0]] = lineCache
319
+ if verbose:
320
+ __teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
176
321
  return correctColumnNum, lineCache
177
322
 
178
- def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...):
323
+ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults = []):
179
324
  """
180
325
  Reads the last valid line from a file.
181
326
 
@@ -187,6 +332,8 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
187
332
  teeLogger (optional): Logger to use for tee print. Defaults to None.
188
333
  encoding (str, optional): The encoding of the file. Defaults to None.
189
334
  strict (bool, optional): Whether to enforce strict processing. Defaults to False.
335
+ delimiter (str, optional): The delimiter used in the file. Defaults to None.
336
+ defaults (list, optional): The default values to use for missing columns. Defaults to [].
190
337
 
191
338
  Returns:
192
339
  list: The last valid line data processed by processLine, or an empty list if none found.
@@ -220,13 +367,14 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
220
367
  if lines[i].strip(): # Skip empty lines
221
368
  # Process the line
222
369
  correctColumnNum, lineCache = _processLine(
223
- lines[i].decode(encoding=encoding),
224
- taskDic,
225
- correctColumnNum,
370
+ line=lines[i].decode(encoding=encoding),
371
+ taskDic=taskDic,
372
+ correctColumnNum=correctColumnNum,
226
373
  verbose=verbose,
227
374
  teeLogger=teeLogger,
228
375
  strict=strict,
229
- delimiter=delimiter
376
+ delimiter=delimiter,
377
+ defaults=defaults,
230
378
  )
231
379
  # If the line is valid, return it
232
380
  if lineCache and any(lineCache):
@@ -327,7 +475,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
327
475
  return False
328
476
  return True
329
477
 
330
- def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t'):
478
+ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = []):
331
479
  """
332
480
  Compatibility method, calls readTabularFile.
333
481
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
@@ -344,6 +492,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
344
492
  - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
345
493
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
346
494
  - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t'.
495
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
347
496
 
348
497
  Returns:
349
498
  - OrderedDict: The dictionary containing the data from the Tabular file.
@@ -352,9 +501,9 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
352
501
  - Exception: If the file is not found or there is a data format error.
353
502
 
354
503
  """
355
- return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter)
504
+ return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
356
505
 
357
- def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...):
506
+ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = []):
358
507
  """
359
508
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
360
509
 
@@ -370,6 +519,7 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
370
519
  - encoding (str, optional): The encoding of the file. Defaults to 'utf8'.
371
520
  - strict (bool, optional): Whether to raise an exception if there is a data format error. Defaults to True.
372
521
  - delimiter (str, optional): The delimiter used in the Tabular file. Defaults to '\t' for TSV, ',' for CSV, '\0' for NSV.
522
+ - defaults (list, optional): The default values to use for missing columns. Defaults to [].
373
523
 
374
524
  Returns:
375
525
  - OrderedDict: The dictionary containing the data from the Tabular file.
@@ -394,12 +544,12 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
394
544
  if verbose:
395
545
  __teePrintOrNot(f"correctColumnNum: {correctColumnNum}",teeLogger=teeLogger)
396
546
  if lastLineOnly:
397
- lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter)
547
+ lineCache = read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=verbose, teeLogger=teeLogger, strict=strict, delimiter=delimiter, defaults=defaults)
398
548
  if lineCache:
399
549
  taskDic[lineCache[0]] = lineCache
400
550
  return lineCache
401
551
  for line in file:
402
- correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter)
552
+ correctColumnNum, lineCache = _processLine(line.decode(encoding=encoding),taskDic,correctColumnNum,verbose = verbose,teeLogger = teeLogger,strict = strict,delimiter=delimiter,defaults = defaults)
403
553
  return taskDic
404
554
 
405
555
  def appendTSV(fileName,lineToAppend,teeLogger = None,header = '',createIfNotExist = False,verifyHeader = True,verbose = False,encoding = 'utf8', strict = True, delimiter = '\t'):
@@ -446,7 +596,7 @@ def appendTabularFile(fileName,lineToAppend,teeLogger = None,header = '',createI
446
596
  if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
447
597
  return
448
598
  if type(lineToAppend) == str:
449
- lineToAppend = lineToAppend.strip().split(delimiter)
599
+ lineToAppend = lineToAppend.split(delimiter)
450
600
  else:
451
601
  for i in range(len(lineToAppend)):
452
602
  if type(lineToAppend[i]) != str:
@@ -548,14 +698,19 @@ class TSVZed(OrderedDict):
548
698
  except Exception as e:
549
699
  print(message,flush=True)
550
700
 
551
- def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...):
701
+ def getResourseUsage(self,return_dict = False):
702
+ return get_resource_usage(return_dict = return_dict)
703
+
704
+ def __init__ (self,fileName,teeLogger = None,header = '',createIfNotExist = True,verifyHeader = True,rewrite_on_load = True,rewrite_on_exit = False,rewrite_interval = 0, append_check_delay = 0.01,monitor_external_changes = True,verbose = False,encoding = 'utf8',delimiter = ...,defualts = [],strict = False):
552
705
  super().__init__()
553
706
  self.version = version
707
+ self.strict = strict
554
708
  self.externalFileUpdateTime = getFileUpdateTimeNs(fileName)
555
709
  self.lastUpdateTime = self.externalFileUpdateTime
556
710
  self._fileName = fileName
557
711
  self.teeLogger = teeLogger
558
712
  self.delimiter = get_delimiter(delimiter,file_name=fileName)
713
+ self.defaults = defualts
559
714
  self.header = _formatHeader(header,verbose = verbose,teeLogger = self.teeLogger,delimiter=self.delimiter)
560
715
  self.correctColumnNum = -1
561
716
  self.createIfNotExist = createIfNotExist
@@ -584,6 +739,27 @@ class TSVZed(OrderedDict):
584
739
  self.load()
585
740
  atexit.register(self.stopAppendThread)
586
741
 
742
+ def setDefaults(self,defaults):
743
+ if not defaults:
744
+ defaults = []
745
+ return
746
+ if isinstance(defaults,str):
747
+ defaults = defaults.split(self.delimiter)
748
+ elif not isinstance(defaults,list):
749
+ try:
750
+ defaults = list(defaults)
751
+ except:
752
+ if self.verbose:
753
+ self.__teePrintOrNot('Invalid defaults, setting defaults to empty.','error')
754
+ defaults = []
755
+ return
756
+ if not any(defaults):
757
+ defaults = []
758
+ return
759
+ if defaults[0] != DEFAULTS_INDICATOR_KEY:
760
+ defaults = [DEFAULTS_INDICATOR_KEY]+defaults
761
+ self.defaults = defaults
762
+
587
763
  def load(self):
588
764
  self.reload()
589
765
  if self.rewrite_on_load:
@@ -597,7 +773,7 @@ class TSVZed(OrderedDict):
597
773
  if self.verbose:
598
774
  self.__teePrintOrNot(f"Loading {self._fileName}")
599
775
  super().clear()
600
- readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = False, delimiter = self.delimiter)
776
+ readTabularFile(self._fileName, teeLogger = self.teeLogger, header = self.header, createIfNotExist = self.createIfNotExist, verifyHeader = self.verifyHeader, verbose = self.verbose, taskDic = self,encoding = self.encoding if self.encoding else None, strict = self.strict, delimiter = self.delimiter, defaults=self.defaults)
601
777
  if self.verbose:
602
778
  self.__teePrintOrNot(f"Loaded {len(self)} records from {self._fileName}")
603
779
  self.correctColumnNum = len(self.header.split(self.delimiter)) if (self.header and self.verifyHeader) else (len(self[next(iter(self))]) if self else -1)
@@ -612,30 +788,55 @@ class TSVZed(OrderedDict):
612
788
  return self
613
789
 
614
790
  def __setitem__(self,key,value):
615
- key = str(key).strip()
791
+ key = str(key).rstrip()
616
792
  if not key:
617
793
  self.__teePrintOrNot('Key cannot be empty','error')
618
794
  return
619
795
  if type(value) == str:
620
- value = value.strip().split(self.delimiter)
796
+ value = value.split(self.delimiter)
621
797
  # sanitize the value
622
- value = [(str(segment).strip() if type(segment) != str else segment.strip()) if segment else '' for segment in value]
623
- #value = list(map(lambda segment: str(segment).strip(), value))
798
+ value = [(str(segment).rstrip() if type(segment) != str else segment.rstrip()) if segment else '' for segment in value]
799
+ # escape the delimiter and newline characters
800
+ value = [segment.replace(self.delimiter,'<sep>').replace('\n','\\n') for segment in value]
624
801
  # the first field in value should be the key
625
802
  # add it if it is not there
626
803
  if not value or value[0] != key:
627
804
  value = [key]+value
628
805
  # verify the value has the correct number of columns
629
806
  if self.correctColumnNum != 1 and len(value) == 1:
630
- # this means we want to clear / deelte the key
807
+ # this means we want to clear / delete the key
631
808
  self.__delitem__(key)
632
809
  elif self.correctColumnNum > 0:
633
- assert len(value) == self.correctColumnNum, f"Data format error! Expected {self.correctColumnNum} columns, but got {len(value) } columns"
810
+ if len(value) != self.correctColumnNum:
811
+ if self.strict:
812
+ self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}. Refuse adding key...",'error')
813
+ return
814
+ elif self.verbose:
815
+ self.__teePrintOrNot(f"Value {value} does not have the correct number of columns: {self.correctColumnNum}, correcting...",'warning')
816
+ if len(value) < self.correctColumnNum:
817
+ value += ['']*(self.correctColumnNum-len(value))
818
+ elif len(value) > self.correctColumnNum:
819
+ value = value[:self.correctColumnNum]
634
820
  else:
635
821
  self.correctColumnNum = len(value)
822
+ if self.defaults and len(self.defaults) > 1:
823
+ for i in range(1,len(value)):
824
+ if not value[i] and i < len(self.defaults) and self.defaults[i]:
825
+ value[i] = self.defaults[i]
826
+ if self.verbose:
827
+ self.__teePrintOrNot(f" Replacing empty value at {i} with default: {self.defaults[i]}")
828
+ if key == DEFAULTS_INDICATOR_KEY:
829
+ self.defaults = value
830
+ if self.verbose:
831
+ self.__teePrintOrNot(f"Defaults set to {value}")
832
+ if not self.memoryOnly:
833
+ self.appendQueue.append(self.delimiter.join(value))
834
+ self.lastUpdateTime = get_time_ns()
835
+ if self.verbose:
836
+ self.__teePrintOrNot(f"Appending Defaults {key} to the appendQueue")
837
+ return
636
838
  if self.verbose:
637
839
  self.__teePrintOrNot(f"Setting {key} to {value}")
638
-
639
840
  if key in self:
640
841
  if self[key] == value:
641
842
  if self.verbose:
@@ -644,9 +845,13 @@ class TSVZed(OrderedDict):
644
845
  self.dirty = True
645
846
  # update the dictionary,
646
847
  super().__setitem__(key,value)
647
- if self.verbose:
648
- self.__teePrintOrNot(f"Key {key} updated")
649
848
  if self.memoryOnly:
849
+ if self.verbose:
850
+ self.__teePrintOrNot(f"Key {key} updated in memory only")
851
+ return
852
+ elif key.startswith('#'):
853
+ if self.verbose:
854
+ self.__teePrintOrNot(f"Key {key} updated in memory only as it starts with #")
650
855
  return
651
856
  if self.verbose:
652
857
  self.__teePrintOrNot(f"Appending {key} to the appendQueue")
@@ -659,16 +864,29 @@ class TSVZed(OrderedDict):
659
864
 
660
865
 
661
866
  def __delitem__(self,key):
662
- key = str(key).strip()
867
+ key = str(key).rstrip()
868
+ if key == DEFAULTS_INDICATOR_KEY:
869
+ self.defaults = []
870
+ if self.verbose:
871
+ self.__teePrintOrNot(f"Defaults cleared")
872
+ if not self.memoryOnly:
873
+ self.__appendEmptyLine(key)
874
+ if self.verbose:
875
+ self.__teePrintOrNot(f"Appending empty default line {key}")
876
+ return
663
877
  # delete the key from the dictionary and update the file
664
878
  if key not in self:
665
879
  if self.verbose:
666
880
  self.__teePrintOrNot(f"Key {key} not found")
667
881
  return
668
882
  super().__delitem__(key)
669
- if self.memoryOnly:
883
+ if self.memoryOnly or key.startswith('#'):
884
+ if self.verbose:
885
+ self.__teePrintOrNot(f"Key {key} deleted in memory")
670
886
  return
671
887
  self.__appendEmptyLine(key)
888
+ if self.verbose:
889
+ self.__teePrintOrNot(f"Appending empty line {key}")
672
890
  self.lastUpdateTime = get_time_ns()
673
891
 
674
892
  def __appendEmptyLine(self,key):
@@ -868,30 +1086,35 @@ memoryOnly:{self.memoryOnly}
868
1086
  return self
869
1087
 
870
1088
  def mapToFile(self):
1089
+ mec = self.monitor_external_changes
1090
+ self.monitor_external_changes = False
871
1091
  try:
872
1092
  if (not self.monitor_external_changes) and self.externalFileUpdateTime < getFileUpdateTimeNs(self._fileName):
873
1093
  self.__teePrintOrNot(f"Warning: Overwriting external changes in {self._fileName}",'warning')
874
1094
  file = self.get_file_obj('r+b')
875
1095
  overWrite = False
876
- line = file.readline().decode(self.encoding)
877
- aftPos = file.tell()
878
- if self.header and not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = False):
879
- file.seek(0)
880
- file.write(f'{self.header}\n'.encode(encoding=self.encoding))
881
- # if the header is not the same length as the line, we need to overwrite the file
882
- if aftPos != file.tell():
883
- overWrite = True
884
- if self.verbose:
885
- self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
1096
+ if self.header:
1097
+ line = file.readline().decode(self.encoding)
1098
+ aftPos = file.tell()
1099
+ if not _lineContainHeader(self.header,line,verbose = self.verbose,teeLogger = self.teeLogger,strict = self.strict):
1100
+ file.seek(0)
1101
+ file.write(f'{self.header}\n'.encode(encoding=self.encoding))
1102
+ # if the header is not the same length as the line, we need to overwrite the file
1103
+ if aftPos != file.tell():
1104
+ overWrite = True
1105
+ if self.verbose:
1106
+ self.__teePrintOrNot(f"Header {self.header} written to {self._fileName}")
886
1107
  for value in self.values():
887
- strToWrite = self.delimiter.join(value)+'\n'
1108
+ if value[0].startswith('#'):
1109
+ continue
1110
+ strToWrite = self.delimiter.join(value)
888
1111
  if overWrite:
889
1112
  if self.verbose:
890
1113
  self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
891
- file.write(strToWrite.encode(encoding=self.encoding))
1114
+ file.write(strToWrite.encode(encoding=self.encoding)+b'\n')
892
1115
  continue
893
1116
  pos = file.tell()
894
- line = file.readline().decode(encoding=self.encoding)
1117
+ line = file.readline()
895
1118
  aftPos = file.tell()
896
1119
  if not line or pos == aftPos:
897
1120
  if self.verbose:
@@ -899,13 +1122,14 @@ memoryOnly:{self.memoryOnly}
899
1122
  file.write(strToWrite.encode(encoding=self.encoding))
900
1123
  overWrite = True
901
1124
  continue
1125
+ strToWrite = strToWrite.encode(encoding=self.encoding).ljust(len(line)-1)+b'\n'
902
1126
  if line != strToWrite:
903
1127
  if self.verbose:
904
- self.__teePrintOrNot(f"Overwriting {value} to {self._fileName}")
1128
+ self.__teePrintOrNot(f"Modifing {value} to {self._fileName}")
905
1129
  file.seek(pos)
906
1130
  # fill the string with space to write to the correct length
907
1131
  #file.write(strToWrite.rstrip('\n').ljust(len(line)-1)+'\n')
908
- file.write(strToWrite.encode(encoding=self.encoding).rstrip(b'\n').ljust(len(line)-1)+b'\n')
1132
+ file.write(strToWrite)
909
1133
  if aftPos != file.tell():
910
1134
  overWrite = True
911
1135
  file.truncate()
@@ -921,6 +1145,8 @@ memoryOnly:{self.memoryOnly}
921
1145
  import traceback
922
1146
  self.__teePrintOrNot(traceback.format_exc(),'error')
923
1147
  self.deSynced = True
1148
+ self.externalFileUpdateTime = getFileUpdateTimeNs(self._fileName)
1149
+ self.monitor_external_changes = mec
924
1150
  return self
925
1151
 
926
1152
  def checkExternalChanges(self):
@@ -1062,7 +1288,10 @@ def __main__():
1062
1288
  parser.add_argument('line', type=str, nargs='*', help='The line to append to the Tabular file. it follows as : {key} {value1} {value2} ... if a key without value be inserted, the value will get deleted.')
1063
1289
  parser.add_argument('-d', '--delimiter', type=str, help='The delimiter of the Tabular file. Default: Infer from last part of filename, or tab if cannot determine. Note: accept unicode escaped char, raw char, or string "comma,tab,null" will refer to their characters. ', default=...)
1064
1290
  parser.add_argument('-c', '--header', type=str, help='Perform checks with this header of the Tabular file. seperate using --delimiter.')
1065
- parser.add_argument('-f', '--force', action='store_true', help='Force the operation. Ignore checks for column numbers / headers')
1291
+ parser.add_argument('--defaults', type=str, help='Default values to fill in the missing columns. seperate using --delimiter. Ex. if -d = comma, --defaults="key,value1,value2..." Note: Please specify the key. But it will not be used as a key need to be unique in data.')
1292
+ strictMode = parser.add_mutually_exclusive_group()
1293
+ strictMode.add_argument('-s', '--strict', dest = 'strict',action='store_true', help='Strict mode. Do not parse values that seems malformed, check for column numbers / headers')
1294
+ strictMode.add_argument('-f', '--force', dest = 'strict',action='store_false', help='Force the operation. Ignore checks for column numbers / headers')
1066
1295
  parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose output')
1067
1296
  parser.add_argument('-V', '--version', action='version', version=f'%(prog)s {version} by {author}')
1068
1297
  args = parser.parse_args()
@@ -1074,6 +1303,13 @@ def __main__():
1074
1303
  except Exception as e:
1075
1304
  print(f"Failed to decode header: {args.header}")
1076
1305
  header = ''
1306
+ defaults = []
1307
+ if args.defaults:
1308
+ try:
1309
+ defaults = args.defaults.encode().decode('unicode_escape').split(args.delimiter)
1310
+ except Exception as e:
1311
+ print(f"Failed to decode defaults: {args.defaults}")
1312
+ defaults = []
1077
1313
 
1078
1314
  if args.operation == 'read':
1079
1315
  # check if the file exist
@@ -1081,14 +1317,14 @@ def __main__():
1081
1317
  print(f"File not found: {args.filename}")
1082
1318
  return
1083
1319
  # read the file
1084
- data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= not args.force, delimiter=args.delimiter)
1320
+ data = readTabularFile(args.filename, verifyHeader = False, verbose=args.verbose,strict= args.strict, delimiter=args.delimiter, defaults=defaults)
1085
1321
  print(pretty_format_table(data.values(),delimiter=args.delimiter))
1086
1322
  elif args.operation == 'append':
1087
- appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
1323
+ appendTabularFile(args.filename, args.line,createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
1088
1324
  elif args.operation == 'delete':
1089
- appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= not args.force, delimiter=args.delimiter)
1325
+ appendTabularFile(args.filename, args.line[:1],createIfNotExist = True, header=header, verbose=args.verbose, strict= args.strict, delimiter=args.delimiter)
1090
1326
  elif args.operation == 'clear':
1091
- clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=not args.force, delimiter=args.delimiter)
1327
+ clearTabularFile(args.filename, header=header, verbose=args.verbose, verifyHeader=args.strict, delimiter=args.delimiter)
1092
1328
  else:
1093
1329
  print("Invalid operation")
1094
1330
  return
TSVZ-3.2.dist-info/RECORD DELETED
@@ -1,6 +0,0 @@
1
- TSVZ.py,sha256=l2y-iLRf0xFusH1aoAciHSBkix0P8k643A1ChU25dPA,54026
2
- TSVZ-3.2.dist-info/METADATA,sha256=p381Xig6aZj75lDC_D3Loa1F4cZa8PJSfALC9UrfrbA,1825
3
- TSVZ-3.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- TSVZ-3.2.dist-info/entry_points.txt,sha256=WeXidyV5yKCRLaVsnAY35xGa08QgytOfvr1CK9aescI,60
5
- TSVZ-3.2.dist-info/top_level.txt,sha256=OPx4LvOpaYykaos7oL_jGaObSWXxLzhHiWLuz-K147g,5
6
- TSVZ-3.2.dist-info/RECORD,,
File without changes