TSVZ 3.23__tar.gz → 3.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: TSVZ
3
- Version: 3.23
3
+ Version: 3.25
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: TSVZ
3
- Version: 3.23
3
+ Version: 3.25
4
4
  Summary: An simple in memory wrapper around a TSV file to function as a database
5
5
  Home-page: https://github.com/yufei-pan/TSVZ
6
6
  Author: Yufei Pan
@@ -22,7 +22,7 @@ if os.name == 'nt':
22
22
  elif os.name == 'posix':
23
23
  import fcntl
24
24
 
25
- version = '3.23'
25
+ version = '3.25'
26
26
  __version__ = version
27
27
  author = 'pan@zopyr.us'
28
28
 
@@ -128,6 +128,7 @@ def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
128
128
  def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_format='.2f'):
129
129
  """
130
130
  Format the size in bytes to a human-readable format or vice versa.
131
+ From hpcp: https://github.com/yufei-pan/hpcp
131
132
 
132
133
  Args:
133
134
  size (int or str): The size in bytes or a string representation of the size.
@@ -140,10 +141,16 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
140
141
  int or str: The formatted size based on the provided arguments.
141
142
 
142
143
  Examples:
143
- >>> format_bytes(1500)
144
- '1.50 KB'
144
+ >>> format_bytes(1500, use_1024_bytes=False)
145
+ '1.50 K'
145
146
  >>> format_bytes('1.5 GiB', to_int=True)
146
147
  1610612736
148
+ >>> format_bytes('1.5 GiB', to_str=True)
149
+ '1.50 Gi'
150
+ >>> format_bytes(1610612736, use_1024_bytes=True, to_str=True)
151
+ '1.50 Gi'
152
+ >>> format_bytes(1610612736, use_1024_bytes=False, to_str=True)
153
+ '1.61 G'
147
154
  """
148
155
  if to_int or isinstance(size, str):
149
156
  if isinstance(size, int):
@@ -152,6 +159,8 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
152
159
  # Use regular expression to split the numeric part from the unit, handling optional whitespace
153
160
  match = re.match(r"(\d+(\.\d+)?)\s*([a-zA-Z]*)", size)
154
161
  if not match:
162
+ if to_str:
163
+ return size
155
164
  print("Invalid size format. Expected format: 'number [unit]', e.g., '1.5 GiB' or '1.5GiB'")
156
165
  print(f"Got: {size}")
157
166
  return 0
@@ -171,8 +180,12 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
171
180
  power = 10**3
172
181
  unit_labels = {'': 0, 'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5}
173
182
  if unit not in unit_labels:
183
+ if to_str:
184
+ return size
174
185
  print(f"Invalid unit '{unit}'. Expected one of {list(unit_labels.keys())}")
175
186
  return 0
187
+ if to_str:
188
+ return format_bytes(size=int(number * (power ** unit_labels[unit])), use_1024_bytes=use_1024_bytes, to_str=True, str_format=str_format)
176
189
  # Calculate the bytes
177
190
  return int(number * (power ** unit_labels[unit]))
178
191
  else:
@@ -183,8 +196,8 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
183
196
  elif to_str or isinstance(size, int) or isinstance(size, float):
184
197
  if isinstance(size, str):
185
198
  try:
186
- size = size.lower().strip().rstrip('b')
187
- size = float(size)
199
+ size = size.rstrip('B').rstrip('b')
200
+ size = float(size.lower().strip())
188
201
  except Exception as e:
189
202
  return size
190
203
  # size is in bytes
@@ -195,7 +208,7 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
195
208
  while size > power:
196
209
  size /= power
197
210
  n += 1
198
- return f"{size:{str_format}} {power_labels[n]}"
211
+ return f"{size:{str_format}}{' '}{power_labels[n]}"
199
212
  else:
200
213
  power = 10**3
201
214
  n = 0
@@ -203,7 +216,7 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
203
216
  while size > power:
204
217
  size /= power
205
218
  n += 1
206
- return f"{size:{str_format}} {power_labels[n]}"
219
+ return f"{size:{str_format}}{' '}{power_labels[n]}"
207
220
  else:
208
221
  try:
209
222
  return format_bytes(float(size), use_1024_bytes)
@@ -267,7 +280,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
267
280
  except Exception:
268
281
  print(message,flush=True)
269
282
 
270
- def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = None):
283
+ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = ...):
271
284
  """
272
285
  Process a line of text and update the task dictionary.
273
286
 
@@ -284,7 +297,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
284
297
  tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
285
298
 
286
299
  """
287
- if not defaults:
300
+ if defaults is ...:
288
301
  defaults = []
289
302
  line = line.strip(' ').strip('\x00').rstrip('\r\n')
290
303
  # we throw away the lines that start with '#'
@@ -354,7 +367,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
354
367
  __teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
355
368
  return correctColumnNum, lineCache
356
369
 
357
- def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults = []):
370
+ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults = ...):
358
371
  """
359
372
  Reads the last valid line from a file.
360
373
 
@@ -374,6 +387,8 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
374
387
  """
375
388
  chunk_size = 1024 # Read in chunks of 1024 bytes
376
389
  last_valid_line = []
390
+ if defaults is ...:
391
+ defaults = []
377
392
  delimiter = get_delimiter(delimiter,file_name=fileName)
378
393
  if verbose:
379
394
  __teePrintOrNot(f"Reading last line only from {fileName}",teeLogger=teeLogger)
@@ -515,7 +530,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
515
530
  return False
516
531
  return True
517
532
 
518
- def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = []):
533
+ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = ...):
519
534
  """
520
535
  Compatibility method, calls readTabularFile.
521
536
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
@@ -543,7 +558,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
543
558
  """
544
559
  return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
545
560
 
546
- def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = []):
561
+ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = ...):
547
562
  """
548
563
  Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
549
564
 
@@ -570,6 +585,8 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
570
585
  """
571
586
  if taskDic is None:
572
587
  taskDic = {}
588
+ if defaults is ...:
589
+ defaults = []
573
590
  delimiter = get_delimiter(delimiter,file_name=fileName)
574
591
  header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger, delimiter = delimiter)
575
592
  if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes