TSVZ 3.23__tar.gz → 3.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsvz-3.23 → tsvz-3.25}/PKG-INFO +2 -2
- {tsvz-3.23 → tsvz-3.25}/TSVZ.egg-info/PKG-INFO +2 -2
- {tsvz-3.23 → tsvz-3.25}/TSVZ.py +29 -12
- {tsvz-3.23 → tsvz-3.25}/README.md +0 -0
- {tsvz-3.23 → tsvz-3.25}/TSVZ.egg-info/SOURCES.txt +0 -0
- {tsvz-3.23 → tsvz-3.25}/TSVZ.egg-info/dependency_links.txt +0 -0
- {tsvz-3.23 → tsvz-3.25}/TSVZ.egg-info/entry_points.txt +0 -0
- {tsvz-3.23 → tsvz-3.25}/TSVZ.egg-info/top_level.txt +0 -0
- {tsvz-3.23 → tsvz-3.25}/setup.cfg +0 -0
- {tsvz-3.23 → tsvz-3.25}/setup.py +0 -0
{tsvz-3.23 → tsvz-3.25}/PKG-INFO
RENAMED
{tsvz-3.23 → tsvz-3.25}/TSVZ.py
RENAMED
|
@@ -22,7 +22,7 @@ if os.name == 'nt':
|
|
|
22
22
|
elif os.name == 'posix':
|
|
23
23
|
import fcntl
|
|
24
24
|
|
|
25
|
-
version = '3.
|
|
25
|
+
version = '3.25'
|
|
26
26
|
__version__ = version
|
|
27
27
|
author = 'pan@zopyr.us'
|
|
28
28
|
|
|
@@ -128,6 +128,7 @@ def pretty_format_table(data, delimiter = DEFAULT_DELIMITER,header = None):
|
|
|
128
128
|
def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_format='.2f'):
|
|
129
129
|
"""
|
|
130
130
|
Format the size in bytes to a human-readable format or vice versa.
|
|
131
|
+
From hpcp: https://github.com/yufei-pan/hpcp
|
|
131
132
|
|
|
132
133
|
Args:
|
|
133
134
|
size (int or str): The size in bytes or a string representation of the size.
|
|
@@ -140,10 +141,16 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
|
|
|
140
141
|
int or str: The formatted size based on the provided arguments.
|
|
141
142
|
|
|
142
143
|
Examples:
|
|
143
|
-
>>> format_bytes(1500)
|
|
144
|
-
'1.50
|
|
144
|
+
>>> format_bytes(1500, use_1024_bytes=False)
|
|
145
|
+
'1.50 K'
|
|
145
146
|
>>> format_bytes('1.5 GiB', to_int=True)
|
|
146
147
|
1610612736
|
|
148
|
+
>>> format_bytes('1.5 GiB', to_str=True)
|
|
149
|
+
'1.50 Gi'
|
|
150
|
+
>>> format_bytes(1610612736, use_1024_bytes=True, to_str=True)
|
|
151
|
+
'1.50 Gi'
|
|
152
|
+
>>> format_bytes(1610612736, use_1024_bytes=False, to_str=True)
|
|
153
|
+
'1.61 G'
|
|
147
154
|
"""
|
|
148
155
|
if to_int or isinstance(size, str):
|
|
149
156
|
if isinstance(size, int):
|
|
@@ -152,6 +159,8 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
|
|
|
152
159
|
# Use regular expression to split the numeric part from the unit, handling optional whitespace
|
|
153
160
|
match = re.match(r"(\d+(\.\d+)?)\s*([a-zA-Z]*)", size)
|
|
154
161
|
if not match:
|
|
162
|
+
if to_str:
|
|
163
|
+
return size
|
|
155
164
|
print("Invalid size format. Expected format: 'number [unit]', e.g., '1.5 GiB' or '1.5GiB'")
|
|
156
165
|
print(f"Got: {size}")
|
|
157
166
|
return 0
|
|
@@ -171,8 +180,12 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
|
|
|
171
180
|
power = 10**3
|
|
172
181
|
unit_labels = {'': 0, 'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5}
|
|
173
182
|
if unit not in unit_labels:
|
|
183
|
+
if to_str:
|
|
184
|
+
return size
|
|
174
185
|
print(f"Invalid unit '{unit}'. Expected one of {list(unit_labels.keys())}")
|
|
175
186
|
return 0
|
|
187
|
+
if to_str:
|
|
188
|
+
return format_bytes(size=int(number * (power ** unit_labels[unit])), use_1024_bytes=use_1024_bytes, to_str=True, str_format=str_format)
|
|
176
189
|
# Calculate the bytes
|
|
177
190
|
return int(number * (power ** unit_labels[unit]))
|
|
178
191
|
else:
|
|
@@ -183,8 +196,8 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
|
|
|
183
196
|
elif to_str or isinstance(size, int) or isinstance(size, float):
|
|
184
197
|
if isinstance(size, str):
|
|
185
198
|
try:
|
|
186
|
-
size = size.
|
|
187
|
-
size = float(size)
|
|
199
|
+
size = size.rstrip('B').rstrip('b')
|
|
200
|
+
size = float(size.lower().strip())
|
|
188
201
|
except Exception as e:
|
|
189
202
|
return size
|
|
190
203
|
# size is in bytes
|
|
@@ -195,7 +208,7 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
|
|
|
195
208
|
while size > power:
|
|
196
209
|
size /= power
|
|
197
210
|
n += 1
|
|
198
|
-
return f"{size:{str_format}} {power_labels[n]}"
|
|
211
|
+
return f"{size:{str_format}}{' '}{power_labels[n]}"
|
|
199
212
|
else:
|
|
200
213
|
power = 10**3
|
|
201
214
|
n = 0
|
|
@@ -203,7 +216,7 @@ def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_forma
|
|
|
203
216
|
while size > power:
|
|
204
217
|
size /= power
|
|
205
218
|
n += 1
|
|
206
|
-
return f"{size:{str_format}} {power_labels[n]}"
|
|
219
|
+
return f"{size:{str_format}}{' '}{power_labels[n]}"
|
|
207
220
|
else:
|
|
208
221
|
try:
|
|
209
222
|
return format_bytes(float(size), use_1024_bytes)
|
|
@@ -267,7 +280,7 @@ def __teePrintOrNot(message,level = 'info',teeLogger = None):
|
|
|
267
280
|
except Exception:
|
|
268
281
|
print(message,flush=True)
|
|
269
282
|
|
|
270
|
-
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults =
|
|
283
|
+
def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,strict = True,delimiter = DEFAULT_DELIMITER,defaults = ...):
|
|
271
284
|
"""
|
|
272
285
|
Process a line of text and update the task dictionary.
|
|
273
286
|
|
|
@@ -284,7 +297,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
284
297
|
tuple: A tuple containing the updated correctColumnNum and the processed lineCache.
|
|
285
298
|
|
|
286
299
|
"""
|
|
287
|
-
if
|
|
300
|
+
if defaults is ...:
|
|
288
301
|
defaults = []
|
|
289
302
|
line = line.strip(' ').strip('\x00').rstrip('\r\n')
|
|
290
303
|
# we throw away the lines that start with '#'
|
|
@@ -354,7 +367,7 @@ def _processLine(line,taskDic,correctColumnNum,verbose = False,teeLogger = None,
|
|
|
354
367
|
__teePrintOrNot(f"Key {lineCache[0]} added",teeLogger=teeLogger)
|
|
355
368
|
return correctColumnNum, lineCache
|
|
356
369
|
|
|
357
|
-
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults =
|
|
370
|
+
def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, teeLogger=None, strict=False,encoding = 'utf8',delimiter = ...,defaults = ...):
|
|
358
371
|
"""
|
|
359
372
|
Reads the last valid line from a file.
|
|
360
373
|
|
|
@@ -374,6 +387,8 @@ def read_last_valid_line(fileName, taskDic, correctColumnNum, verbose=False, tee
|
|
|
374
387
|
"""
|
|
375
388
|
chunk_size = 1024 # Read in chunks of 1024 bytes
|
|
376
389
|
last_valid_line = []
|
|
390
|
+
if defaults is ...:
|
|
391
|
+
defaults = []
|
|
377
392
|
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
378
393
|
if verbose:
|
|
379
394
|
__teePrintOrNot(f"Reading last line only from {fileName}",teeLogger=teeLogger)
|
|
@@ -515,7 +530,7 @@ def _verifyFileExistence(fileName,createIfNotExist = True,teeLogger = None,heade
|
|
|
515
530
|
return False
|
|
516
531
|
return True
|
|
517
532
|
|
|
518
|
-
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults =
|
|
533
|
+
def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = '\t',defaults = ...):
|
|
519
534
|
"""
|
|
520
535
|
Compatibility method, calls readTabularFile.
|
|
521
536
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
@@ -543,7 +558,7 @@ def readTSV(fileName,teeLogger = None,header = '',createIfNotExist = False, last
|
|
|
543
558
|
"""
|
|
544
559
|
return readTabularFile(fileName,teeLogger = teeLogger,header = header,createIfNotExist = createIfNotExist,lastLineOnly = lastLineOnly,verifyHeader = verifyHeader,verbose = verbose,taskDic = taskDic,encoding = encoding,strict = strict,delimiter = delimiter,defaults=defaults)
|
|
545
560
|
|
|
546
|
-
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults =
|
|
561
|
+
def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = False, lastLineOnly = False,verifyHeader = True,verbose = False,taskDic = None,encoding = 'utf8',strict = True,delimiter = ...,defaults = ...):
|
|
547
562
|
"""
|
|
548
563
|
Read a Tabular (CSV / TSV / NSV) file and return the data as a dictionary.
|
|
549
564
|
|
|
@@ -570,6 +585,8 @@ def readTabularFile(fileName,teeLogger = None,header = '',createIfNotExist = Fal
|
|
|
570
585
|
"""
|
|
571
586
|
if taskDic is None:
|
|
572
587
|
taskDic = {}
|
|
588
|
+
if defaults is ...:
|
|
589
|
+
defaults = []
|
|
573
590
|
delimiter = get_delimiter(delimiter,file_name=fileName)
|
|
574
591
|
header = _formatHeader(header,verbose = verbose,teeLogger = teeLogger, delimiter = delimiter)
|
|
575
592
|
if not _verifyFileExistence(fileName,createIfNotExist = createIfNotExist,teeLogger = teeLogger,header = header,encoding = encoding,strict = strict,delimiter=delimiter):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tsvz-3.23 → tsvz-3.25}/setup.py
RENAMED
|
File without changes
|