numbers-parser 4.7.1__py3-none-any.whl → 4.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numbers_parser/__init__.py +2 -1
- numbers_parser/cell.py +385 -100
- numbers_parser/cell_storage.py +151 -162
- numbers_parser/constants.py +165 -1
- numbers_parser/document.py +932 -228
- numbers_parser/formula.py +10 -10
- numbers_parser/model.py +291 -124
- numbers_parser-4.8.0.dist-info/METADATA +378 -0
- {numbers_parser-4.7.1.dist-info → numbers_parser-4.8.0.dist-info}/RECORD +12 -12
- {numbers_parser-4.7.1.dist-info → numbers_parser-4.8.0.dist-info}/WHEEL +1 -1
- numbers_parser-4.7.1.dist-info/METADATA +0 -626
- {numbers_parser-4.7.1.dist-info → numbers_parser-4.8.0.dist-info}/LICENSE.rst +0 -0
- {numbers_parser-4.7.1.dist-info → numbers_parser-4.8.0.dist-info}/entry_points.txt +0 -0
numbers_parser/cell_storage.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import math
|
|
3
3
|
import re
|
|
4
|
-
from collections import OrderedDict
|
|
5
4
|
from fractions import Fraction
|
|
6
5
|
from struct import unpack
|
|
7
|
-
from typing import Tuple
|
|
6
|
+
from typing import Tuple, Union
|
|
8
7
|
from warnings import warn
|
|
9
8
|
|
|
10
9
|
import sigfig
|
|
@@ -13,6 +12,8 @@ from pendulum import datetime, duration
|
|
|
13
12
|
from numbers_parser import __name__ as numbers_parser_name
|
|
14
13
|
from numbers_parser.constants import (
|
|
15
14
|
CURRENCY_CELL_TYPE,
|
|
15
|
+
CUSTOM_TEXT_PLACEHOLDER,
|
|
16
|
+
DATETIME_FIELD_MAP,
|
|
16
17
|
DECIMAL_PLACES_AUTO,
|
|
17
18
|
EPOCH,
|
|
18
19
|
MAX_SIGNIFICANT_DIGITS,
|
|
@@ -22,14 +23,14 @@ from numbers_parser.constants import (
|
|
|
22
23
|
SECONDS_IN_WEEK,
|
|
23
24
|
CellPadding,
|
|
24
25
|
CellType,
|
|
26
|
+
CustomFormattingType,
|
|
25
27
|
DurationStyle,
|
|
26
28
|
DurationUnits,
|
|
29
|
+
FormattingType,
|
|
27
30
|
FormatType,
|
|
28
|
-
NegativeNumberStyle,
|
|
29
31
|
)
|
|
30
|
-
from numbers_parser.currencies import
|
|
32
|
+
from numbers_parser.currencies import CURRENCY_SYMBOLS
|
|
31
33
|
from numbers_parser.exceptions import UnsupportedError, UnsupportedWarning
|
|
32
|
-
from numbers_parser.generated import TSKArchives_pb2 as TSKArchives
|
|
33
34
|
from numbers_parser.generated import TSTArchives_pb2 as TSTArchives
|
|
34
35
|
from numbers_parser.numbers_cache import Cacheable, cache
|
|
35
36
|
from numbers_parser.numbers_uuid import NumbersUUID
|
|
@@ -37,47 +38,6 @@ from numbers_parser.numbers_uuid import NumbersUUID
|
|
|
37
38
|
logger = logging.getLogger(numbers_parser_name)
|
|
38
39
|
debug = logger.debug
|
|
39
40
|
|
|
40
|
-
DATETIME_FIELD_MAP = OrderedDict(
|
|
41
|
-
[
|
|
42
|
-
("a", lambda x: x.strftime("%p").lower()),
|
|
43
|
-
("EEEE", "%A"),
|
|
44
|
-
("EEE", "%a"),
|
|
45
|
-
("yyyy", "%Y"),
|
|
46
|
-
("yy", "%y"),
|
|
47
|
-
("y", "%Y"),
|
|
48
|
-
("MMMM", "%B"),
|
|
49
|
-
("MMM", "%b"),
|
|
50
|
-
("MM", "%m"),
|
|
51
|
-
("M", "%-m"),
|
|
52
|
-
("d", "%-d"),
|
|
53
|
-
("dd", "%d"),
|
|
54
|
-
("DDD", lambda x: str(x.day_of_year).zfill(3)),
|
|
55
|
-
("DD", lambda x: str(x.day_of_year).zfill(2)),
|
|
56
|
-
("D", lambda x: str(x.day_of_year).zfill(1)),
|
|
57
|
-
("HH", "%H"),
|
|
58
|
-
("H", "%-H"),
|
|
59
|
-
("hh", "%I"),
|
|
60
|
-
("h", "%-I"),
|
|
61
|
-
("k", lambda x: str(x.hour).replace("0", "24")),
|
|
62
|
-
("kk", lambda x: str(x.hour).replace("0", "24").zfill(2)),
|
|
63
|
-
("K", lambda x: str(x.hour % 12)),
|
|
64
|
-
("KK", lambda x: str(x.hour % 12).zfill(2)),
|
|
65
|
-
("mm", lambda x: str(x.minute).zfill(2)),
|
|
66
|
-
("m", lambda x: str(x.minute)),
|
|
67
|
-
("ss", "%S"),
|
|
68
|
-
("s", lambda x: str(x.second)),
|
|
69
|
-
("W", lambda x: str(x.week_of_month - 1)),
|
|
70
|
-
("ww", "%W"),
|
|
71
|
-
("G", "AD"), # TODO: support BC
|
|
72
|
-
("F", lambda x: days_occurred_in_month(x)),
|
|
73
|
-
("S", lambda x: str(x.microsecond).zfill(6)[0]),
|
|
74
|
-
("SS", lambda x: str(x.microsecond).zfill(6)[0:2]),
|
|
75
|
-
("SSS", lambda x: str(x.microsecond).zfill(6)[0:3]),
|
|
76
|
-
("SSSS", lambda x: str(x.microsecond).zfill(6)[0:4]),
|
|
77
|
-
("SSSSS", lambda x: str(x.microsecond).zfill(6)[0:5]),
|
|
78
|
-
]
|
|
79
|
-
)
|
|
80
|
-
|
|
81
41
|
|
|
82
42
|
class CellStorage(Cacheable):
|
|
83
43
|
# 15% performance uplift for using slots
|
|
@@ -86,8 +46,8 @@ class CellStorage(Cacheable):
|
|
|
86
46
|
"datetime",
|
|
87
47
|
"model",
|
|
88
48
|
"table_id",
|
|
89
|
-
"
|
|
90
|
-
"
|
|
49
|
+
"row",
|
|
50
|
+
"col",
|
|
91
51
|
"value",
|
|
92
52
|
"type",
|
|
93
53
|
"d128",
|
|
@@ -117,13 +77,13 @@ class CellStorage(Cacheable):
|
|
|
117
77
|
|
|
118
78
|
# @profile
|
|
119
79
|
def __init__( # noqa: PLR0912, PLR0913, PLR0915
|
|
120
|
-
self, model: object, table_id: int, buffer,
|
|
80
|
+
self, model: object, table_id: int, buffer, row, col
|
|
121
81
|
):
|
|
122
82
|
self.buffer = buffer
|
|
123
83
|
self.model = model
|
|
124
84
|
self.table_id = table_id
|
|
125
|
-
self.
|
|
126
|
-
self.
|
|
85
|
+
self.row = row
|
|
86
|
+
self.col = col
|
|
127
87
|
|
|
128
88
|
self.d128 = None
|
|
129
89
|
self.double = None
|
|
@@ -258,15 +218,41 @@ class CellStorage(Cacheable):
|
|
|
258
218
|
else:
|
|
259
219
|
raise UnsupportedError(f"Cell type ID {cell_type} is not recognised")
|
|
260
220
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
table_id
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
221
|
+
if logging.getLogger(__package__).level == logging.DEBUG:
|
|
222
|
+
# Guard to reduce expense of computing fields
|
|
223
|
+
extras = unpack("<H", buffer[6:8])[0]
|
|
224
|
+
table_name = model.table_name(table_id)
|
|
225
|
+
sheet_name = model.sheet_name(model.table_id_to_sheet_id(table_id))
|
|
226
|
+
fields = [
|
|
227
|
+
f"{x}=" + str(getattr(self, x)) if getattr(self, x) is not None else None
|
|
228
|
+
for x in self.__slots__
|
|
229
|
+
if x.endswith("_id")
|
|
230
|
+
]
|
|
231
|
+
fields = ", ".join([x for x in fields if x if not None])
|
|
232
|
+
debug(
|
|
233
|
+
"%s@%s@[%d,%d]: table_id=%d, type=%s, value=%s, flags=%08x, extras=%04x, %s",
|
|
234
|
+
sheet_name,
|
|
235
|
+
table_name,
|
|
236
|
+
row,
|
|
237
|
+
col,
|
|
238
|
+
table_id,
|
|
239
|
+
self.type.name,
|
|
240
|
+
self.value,
|
|
241
|
+
flags,
|
|
242
|
+
extras,
|
|
243
|
+
fields,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def update_value(self, value, cell: object) -> None:
|
|
247
|
+
if cell._type == TSTArchives.numberCellType:
|
|
248
|
+
self.d128 = value
|
|
249
|
+
self.type = CellType.NUMBER
|
|
250
|
+
elif cell._type == TSTArchives.dateCellType:
|
|
251
|
+
self.datetime = value
|
|
252
|
+
self.type = CellType.DATE
|
|
253
|
+
elif cell._type == TSTArchives.durationCellType:
|
|
254
|
+
self.double = value
|
|
255
|
+
self.value = value
|
|
270
256
|
|
|
271
257
|
@property
|
|
272
258
|
def formatted(self):
|
|
@@ -309,7 +295,7 @@ class CellStorage(Cacheable):
|
|
|
309
295
|
else:
|
|
310
296
|
return (self.model.objects.file_store[image_pathnames[0]], preferred_filename)
|
|
311
297
|
|
|
312
|
-
def custom_format(self) -> str:
|
|
298
|
+
def custom_format(self) -> str: # noqa: PLR0911
|
|
313
299
|
if self.text_format_id is not None and self.type == CellType.TEXT:
|
|
314
300
|
format = self.model.table_format(self.table_id, self.text_format_id)
|
|
315
301
|
elif self.currency_format_id is not None:
|
|
@@ -320,17 +306,13 @@ class CellStorage(Cacheable):
|
|
|
320
306
|
format = self.model.table_format(self.table_id, self.bool_format_id)
|
|
321
307
|
else:
|
|
322
308
|
return str(self.value)
|
|
309
|
+
|
|
323
310
|
if format.HasField("custom_uid"):
|
|
324
311
|
format_uuid = NumbersUUID(format.custom_uid).hex
|
|
325
312
|
format_map = self.model.custom_format_map()
|
|
326
313
|
custom_format = format_map[format_uuid].default_format
|
|
327
314
|
if custom_format.requires_fraction_replacement:
|
|
328
|
-
|
|
329
|
-
if accuracy & 0xFF000000:
|
|
330
|
-
num_digits = 0x100000000 - accuracy
|
|
331
|
-
formatted_value = float_to_n_digit_fraction(self.d128, num_digits)
|
|
332
|
-
else:
|
|
333
|
-
formatted_value = float_to_fraction(self.d128, accuracy)
|
|
315
|
+
formatted_value = format_fraction(self.d128, custom_format)
|
|
334
316
|
elif custom_format.format_type == FormatType.CUSTOM_TEXT:
|
|
335
317
|
formatted_value = decode_text_format(
|
|
336
318
|
custom_format,
|
|
@@ -346,6 +328,14 @@ class CellStorage(Cacheable):
|
|
|
346
328
|
return format_currency(self.d128, format)
|
|
347
329
|
elif format.format_type == FormatType.BOOLEAN:
|
|
348
330
|
return "TRUE" if self.value else "FALSE"
|
|
331
|
+
elif format.format_type == FormatType.PERCENT:
|
|
332
|
+
return format_decimal(self.d128 * 100, format, percent=True)
|
|
333
|
+
elif format.format_type == FormatType.BASE:
|
|
334
|
+
return format_base(self.d128, format)
|
|
335
|
+
elif format.format_type == FormatType.FRACTION:
|
|
336
|
+
return format_fraction(self.d128, format)
|
|
337
|
+
elif format.format_type == FormatType.SCIENTIFIC:
|
|
338
|
+
return format_scientific(self.d128, format)
|
|
349
339
|
else:
|
|
350
340
|
formatted_value = str(self.value)
|
|
351
341
|
return formatted_value
|
|
@@ -441,63 +431,18 @@ class CellStorage(Cacheable):
|
|
|
441
431
|
|
|
442
432
|
return duration_str
|
|
443
433
|
|
|
444
|
-
def
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
self.date_format_id = self.model.table_format_id(self.table_id, format_archive)
|
|
450
|
-
|
|
451
|
-
def set_number_formatting(self, number_format) -> None:
|
|
452
|
-
check_number_format(number_format)
|
|
453
|
-
if "decimal_places" not in number_format or number_format["decimal_places"] is None:
|
|
454
|
-
decimal_places = DECIMAL_PLACES_AUTO
|
|
455
|
-
else:
|
|
456
|
-
decimal_places = number_format["decimal_places"]
|
|
457
|
-
|
|
458
|
-
if "negative_style" not in number_format:
|
|
459
|
-
negative_style = NegativeNumberStyle.MINUS
|
|
460
|
-
elif "use_accounting_style" in number_format:
|
|
461
|
-
warn(
|
|
462
|
-
"Use of use_accounting_style overrules negative_style",
|
|
463
|
-
RuntimeWarning,
|
|
464
|
-
stacklevel=4,
|
|
465
|
-
)
|
|
466
|
-
negative_style = NegativeNumberStyle.MINUS
|
|
467
|
-
else:
|
|
468
|
-
negative_style = number_format["negative_style"]
|
|
469
|
-
|
|
470
|
-
if "show_thousands_separator" not in number_format:
|
|
471
|
-
show_thousands_separator = False
|
|
472
|
-
else:
|
|
473
|
-
show_thousands_separator = number_format["show_thousands_separator"]
|
|
474
|
-
|
|
475
|
-
if "currency_code" in number_format:
|
|
476
|
-
format_type = FormatType.CURRENCY
|
|
477
|
-
else:
|
|
478
|
-
format_type = FormatType.DECIMAL
|
|
479
|
-
|
|
480
|
-
attrs = {
|
|
481
|
-
"format_type": format_type,
|
|
482
|
-
"decimal_places": decimal_places,
|
|
483
|
-
"negative_style": negative_style,
|
|
484
|
-
"show_thousands_separator": show_thousands_separator,
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
if format_type == FormatType.CURRENCY:
|
|
434
|
+
def _set_formatting(
|
|
435
|
+
self, format_id: int, format_type: Union[FormattingType, CustomFormattingType]
|
|
436
|
+
) -> None:
|
|
437
|
+
if format_type == FormattingType.CURRENCY:
|
|
438
|
+
self.currency_format_id = format_id
|
|
488
439
|
self.is_currency = True
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
attrs["use_accounting_style"] = number_format["use_accounting_style"]
|
|
494
|
-
else:
|
|
495
|
-
attrs["use_accounting_style"] = False
|
|
496
|
-
attrs["currency_code"] = number_format["currency_code"]
|
|
440
|
+
elif format_type in [FormattingType.DATETIME, CustomFormattingType.DATETIME]:
|
|
441
|
+
self.date_format_id = format_id
|
|
442
|
+
elif format_type == CustomFormattingType.TEXT:
|
|
443
|
+
self.text_format_id = format_id
|
|
497
444
|
else:
|
|
498
|
-
self.
|
|
499
|
-
format_archive = TSKArchives.FormatStructArchive(**attrs)
|
|
500
|
-
self.num_format_id = self.model.table_format_id(self.table_id, format_archive)
|
|
445
|
+
self.num_format_id = format_id
|
|
501
446
|
|
|
502
447
|
|
|
503
448
|
def unpack_decimal128(buffer: bytearray) -> float:
|
|
@@ -512,12 +457,6 @@ def unpack_decimal128(buffer: bytearray) -> float:
|
|
|
512
457
|
return float(value)
|
|
513
458
|
|
|
514
459
|
|
|
515
|
-
def days_occurred_in_month(value: datetime) -> str:
|
|
516
|
-
"""Return how many times the day of the datetime value has fallen in the month."""
|
|
517
|
-
n_days = int((value - value.replace(day=1)).days / 7) + 1
|
|
518
|
-
return str(n_days)
|
|
519
|
-
|
|
520
|
-
|
|
521
460
|
def decode_date_format_field(field: str, value: datetime) -> str:
|
|
522
461
|
if field in DATETIME_FIELD_MAP:
|
|
523
462
|
s = DATETIME_FIELD_MAP[field]
|
|
@@ -581,7 +520,7 @@ def decode_date_format(format, value):
|
|
|
581
520
|
def decode_text_format(format, value: str):
|
|
582
521
|
"""Parse a custom date format string and return a formatted number value."""
|
|
583
522
|
custom_format_string = format.custom_format_string
|
|
584
|
-
return custom_format_string.replace(
|
|
523
|
+
return custom_format_string.replace(CUSTOM_TEXT_PLACEHOLDER, value)
|
|
585
524
|
|
|
586
525
|
|
|
587
526
|
def expand_quotes(value: str) -> str:
|
|
@@ -751,7 +690,7 @@ def decode_number_format(format, value, name): # noqa: PLR0912
|
|
|
751
690
|
return expand_quotes(formatted_value)
|
|
752
691
|
|
|
753
692
|
|
|
754
|
-
def format_decimal(value: float, format) -> str:
|
|
693
|
+
def format_decimal(value: float, format, percent: bool = False) -> str:
|
|
755
694
|
if value is None:
|
|
756
695
|
return ""
|
|
757
696
|
if value < 0 and format.negative_style == 1:
|
|
@@ -782,6 +721,9 @@ def format_decimal(value: float, format) -> str:
|
|
|
782
721
|
except ValueError:
|
|
783
722
|
pass
|
|
784
723
|
|
|
724
|
+
if percent:
|
|
725
|
+
formatted_value += "%"
|
|
726
|
+
|
|
785
727
|
if accounting_style:
|
|
786
728
|
return f"({formatted_value})"
|
|
787
729
|
else:
|
|
@@ -802,17 +744,75 @@ def format_currency(value: float, format) -> str:
|
|
|
802
744
|
return symbol + formatted_value
|
|
803
745
|
|
|
804
746
|
|
|
747
|
+
INT_TO_BASE_CHAR = [str(x) for x in range(0, 10)] + [chr(x) for x in range(ord("A"), ord("Z") + 1)]
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
def invert_bit_str(value: str) -> str:
|
|
751
|
+
"""Invert a binary value"""
|
|
752
|
+
return "".join(["0" if b == "1" else "1" for b in value])
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def twos_complement(value: int, base: int) -> str:
|
|
756
|
+
"""Calculate the twos complement of a negative integer with minimum 32-bit precision"""
|
|
757
|
+
num_bits = max([32, math.ceil(math.log2(abs(value))) + 1])
|
|
758
|
+
bin_value = bin(abs(value))[2:]
|
|
759
|
+
inverted_bin_value = invert_bit_str(bin_value).rjust(num_bits, "1")
|
|
760
|
+
twos_complement_dec = int(inverted_bin_value, 2) + 1
|
|
761
|
+
|
|
762
|
+
if base == 2:
|
|
763
|
+
return bin(twos_complement_dec)[2:].rjust(num_bits, "1")
|
|
764
|
+
elif base == 8:
|
|
765
|
+
return oct(twos_complement_dec)[2:]
|
|
766
|
+
else:
|
|
767
|
+
return hex(twos_complement_dec)[2:].upper()
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def format_base(value: float, format) -> str:
|
|
771
|
+
if value == 0:
|
|
772
|
+
return "0".zfill(format.base_places)
|
|
773
|
+
|
|
774
|
+
value = round(value)
|
|
775
|
+
|
|
776
|
+
is_negative = False
|
|
777
|
+
if not format.base_use_minus_sign and format.base in [2, 8, 16]:
|
|
778
|
+
if value < 0:
|
|
779
|
+
return twos_complement(value, format.base)
|
|
780
|
+
else:
|
|
781
|
+
value = abs(value)
|
|
782
|
+
elif value < 0:
|
|
783
|
+
is_negative = True
|
|
784
|
+
value = abs(value)
|
|
785
|
+
|
|
786
|
+
formatted_value = []
|
|
787
|
+
while value:
|
|
788
|
+
formatted_value.append(int(value % format.base))
|
|
789
|
+
value //= format.base
|
|
790
|
+
formatted_value = "".join([INT_TO_BASE_CHAR[x] for x in formatted_value[::-1]])
|
|
791
|
+
|
|
792
|
+
if is_negative:
|
|
793
|
+
return "-" + formatted_value.zfill(format.base_places)
|
|
794
|
+
else:
|
|
795
|
+
return formatted_value.zfill(format.base_places)
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def format_fraction_parts_to(whole: int, numerator: int, denominator: int):
|
|
799
|
+
if whole > 0:
|
|
800
|
+
if numerator == 0:
|
|
801
|
+
return str(whole)
|
|
802
|
+
else:
|
|
803
|
+
return f"{whole} {numerator}/{denominator}"
|
|
804
|
+
elif numerator == 0:
|
|
805
|
+
return "0"
|
|
806
|
+
elif numerator == denominator:
|
|
807
|
+
return "1"
|
|
808
|
+
return f"{numerator}/{denominator}"
|
|
809
|
+
|
|
810
|
+
|
|
805
811
|
def float_to_fraction(value: float, denominator: int) -> str:
|
|
806
812
|
"""Convert a float to the nearest fraction and return as a string."""
|
|
807
813
|
whole = int(value)
|
|
808
814
|
numerator = round(denominator * (value - whole))
|
|
809
|
-
|
|
810
|
-
formatted_value = "0"
|
|
811
|
-
elif whole > 0:
|
|
812
|
-
formatted_value = f"{whole} {numerator}/{denominator}"
|
|
813
|
-
else:
|
|
814
|
-
formatted_value = f"{numerator}/{denominator}"
|
|
815
|
-
return formatted_value
|
|
815
|
+
return format_fraction_parts_to(whole, numerator, denominator)
|
|
816
816
|
|
|
817
817
|
|
|
818
818
|
def float_to_n_digit_fraction(value: float, max_digits: int) -> str:
|
|
@@ -825,13 +825,21 @@ def float_to_n_digit_fraction(value: float, max_digits: int) -> str:
|
|
|
825
825
|
)
|
|
826
826
|
whole = int(value)
|
|
827
827
|
numerator -= whole * denominator
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
828
|
+
return format_fraction_parts_to(whole, numerator, denominator)
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def format_fraction(value: float, format) -> str:
|
|
832
|
+
accuracy = format.fraction_accuracy
|
|
833
|
+
if accuracy & 0xFF000000:
|
|
834
|
+
num_digits = 0x100000000 - accuracy
|
|
835
|
+
return float_to_n_digit_fraction(value, num_digits)
|
|
832
836
|
else:
|
|
833
|
-
|
|
834
|
-
|
|
837
|
+
return float_to_fraction(value, accuracy)
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def format_scientific(value: float, format) -> str:
|
|
841
|
+
formatted_value = sigfig.round(value, sigfigs=MAX_SIGNIFICANT_DIGITS, warn=False)
|
|
842
|
+
return f"{formatted_value:.{format.decimal_places}E}"
|
|
835
843
|
|
|
836
844
|
|
|
837
845
|
def unit_format(unit: str, value: int, style: int, abbrev: str = None):
|
|
@@ -881,22 +889,3 @@ def auto_units(cell_value, format):
|
|
|
881
889
|
unit_smallest = unit_largest
|
|
882
890
|
|
|
883
891
|
return unit_smallest, unit_largest
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
def check_date_time_format(format: str) -> None:
|
|
887
|
-
formats = re.sub(r"[^a-zA-Z\s]", " ", format).split()
|
|
888
|
-
for el in formats:
|
|
889
|
-
if el not in DATETIME_FIELD_MAP:
|
|
890
|
-
raise TypeError(f"Invalid format specifier '{el}' in date/time format")
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
def check_number_format(format: dict) -> None:
|
|
894
|
-
for key in format:
|
|
895
|
-
if key not in [
|
|
896
|
-
"decimal_places",
|
|
897
|
-
"negative_style",
|
|
898
|
-
"show_thousands_separator",
|
|
899
|
-
"currency_code",
|
|
900
|
-
"use_accounting_style",
|
|
901
|
-
]:
|
|
902
|
-
raise TypeError(f"Invalid format specifier '{key}' in number format")
|
numbers_parser/constants.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
1
2
|
from enum import IntEnum
|
|
2
3
|
|
|
4
|
+
import enum_tools.documentation
|
|
3
5
|
from pendulum import datetime
|
|
4
6
|
|
|
5
7
|
try:
|
|
@@ -8,6 +10,18 @@ try:
|
|
|
8
10
|
except ImportError: # pragma: nocover
|
|
9
11
|
from importlib_resources import files
|
|
10
12
|
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CellType",
|
|
15
|
+
"PaddingType",
|
|
16
|
+
"CellPadding",
|
|
17
|
+
"DurationStyle",
|
|
18
|
+
"DurationUnits",
|
|
19
|
+
"FormatType",
|
|
20
|
+
"FormattingType",
|
|
21
|
+
"NegativeNumberStyle",
|
|
22
|
+
"FractionAccuracy",
|
|
23
|
+
]
|
|
24
|
+
|
|
11
25
|
DEFAULT_DOCUMENT = files("numbers_parser") / "data" / "empty.numbers"
|
|
12
26
|
|
|
13
27
|
# New document defaults
|
|
@@ -16,7 +30,6 @@ DEFAULT_COLUMN_WIDTH = 98.0
|
|
|
16
30
|
DEFAULT_PRE_BNC_BYTES = "🤠".encode() # Yes, really!
|
|
17
31
|
DEFAULT_ROW_COUNT = 12
|
|
18
32
|
DEFAULT_ROW_HEIGHT = 20.0
|
|
19
|
-
DEFAULT_NUM_HEADERS = 1
|
|
20
33
|
DEFAULT_TABLE_OFFSET = 80.0
|
|
21
34
|
DEFAULT_TILE_SIZE = 256
|
|
22
35
|
|
|
@@ -31,12 +44,16 @@ DEFAULT_TEXT_INSET = 4.0
|
|
|
31
44
|
DEFAULT_TEXT_WRAP = True
|
|
32
45
|
EMPTY_STORAGE_BUFFER = b"\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
|
33
46
|
|
|
47
|
+
# Formatting defaults
|
|
48
|
+
DEFAULT_DATETIME_FORMAT = "dd MMM YYY HH:MM"
|
|
49
|
+
|
|
34
50
|
# Numbers limits
|
|
35
51
|
MAX_TILE_SIZE = 256
|
|
36
52
|
MAX_ROW_COUNT = 1000000
|
|
37
53
|
MAX_COL_COUNT = 1000
|
|
38
54
|
MAX_HEADER_COUNT = 5
|
|
39
55
|
MAX_SIGNIFICANT_DIGITS = 15
|
|
56
|
+
MAX_BASE = 36
|
|
40
57
|
|
|
41
58
|
# Root object IDs
|
|
42
59
|
DOCUMENT_ID = 1
|
|
@@ -51,6 +68,56 @@ SECONDS_IN_WEEK = SECONDS_IN_DAY * 7
|
|
|
51
68
|
# File format enumerations
|
|
52
69
|
DECIMAL_PLACES_AUTO = 253
|
|
53
70
|
CURRENCY_CELL_TYPE = 10
|
|
71
|
+
CUSTOM_TEXT_PLACEHOLDER = "\ue421"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Supported date/time directives
|
|
75
|
+
def _days_occurred_in_month(value: datetime) -> str:
|
|
76
|
+
"""Return how many times the day of the datetime value has fallen in the month."""
|
|
77
|
+
n_days = int((value - value.replace(day=1)).days / 7) + 1
|
|
78
|
+
return str(n_days)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
DATETIME_FIELD_MAP = OrderedDict(
|
|
82
|
+
[
|
|
83
|
+
("a", lambda x: x.strftime("%p").lower()),
|
|
84
|
+
("EEEE", "%A"),
|
|
85
|
+
("EEE", "%a"),
|
|
86
|
+
("yyyy", "%Y"),
|
|
87
|
+
("yy", "%y"),
|
|
88
|
+
("y", "%Y"),
|
|
89
|
+
("MMMM", "%B"),
|
|
90
|
+
("MMM", "%b"),
|
|
91
|
+
("MM", "%m"),
|
|
92
|
+
("M", "%-m"),
|
|
93
|
+
("d", "%-d"),
|
|
94
|
+
("dd", "%d"),
|
|
95
|
+
("DDD", lambda x: str(x.day_of_year).zfill(3)),
|
|
96
|
+
("DD", lambda x: str(x.day_of_year).zfill(2)),
|
|
97
|
+
("D", lambda x: str(x.day_of_year).zfill(1)),
|
|
98
|
+
("HH", "%H"),
|
|
99
|
+
("H", "%-H"),
|
|
100
|
+
("hh", "%I"),
|
|
101
|
+
("h", "%-I"),
|
|
102
|
+
("k", lambda x: str(x.hour).replace("0", "24")),
|
|
103
|
+
("kk", lambda x: str(x.hour).replace("0", "24").zfill(2)),
|
|
104
|
+
("K", lambda x: str(x.hour % 12)),
|
|
105
|
+
("KK", lambda x: str(x.hour % 12).zfill(2)),
|
|
106
|
+
("mm", lambda x: str(x.minute).zfill(2)),
|
|
107
|
+
("m", lambda x: str(x.minute)),
|
|
108
|
+
("ss", "%S"),
|
|
109
|
+
("s", lambda x: str(x.second)),
|
|
110
|
+
("W", lambda x: str(x.week_of_month - 1)),
|
|
111
|
+
("ww", "%W"),
|
|
112
|
+
("G", "AD"), # TODO: support BC
|
|
113
|
+
("F", lambda x: _days_occurred_in_month(x)),
|
|
114
|
+
("S", lambda x: str(x.microsecond).zfill(6)[0]),
|
|
115
|
+
("SS", lambda x: str(x.microsecond).zfill(6)[0:2]),
|
|
116
|
+
("SSS", lambda x: str(x.microsecond).zfill(6)[0:3]),
|
|
117
|
+
("SSSS", lambda x: str(x.microsecond).zfill(6)[0:4]),
|
|
118
|
+
("SSSSS", lambda x: str(x.microsecond).zfill(6)[0:5]),
|
|
119
|
+
]
|
|
120
|
+
)
|
|
54
121
|
|
|
55
122
|
|
|
56
123
|
class CellType(IntEnum):
|
|
@@ -90,6 +157,7 @@ class FormatType(IntEnum):
|
|
|
90
157
|
DECIMAL = 256
|
|
91
158
|
CURRENCY = 257
|
|
92
159
|
PERCENT = 258
|
|
160
|
+
SCIENTIFIC = 259
|
|
93
161
|
TEXT = 260
|
|
94
162
|
DATE = 261
|
|
95
163
|
FRACTION = 262
|
|
@@ -103,8 +171,104 @@ class FormatType(IntEnum):
|
|
|
103
171
|
CUSTOM_CURRENCY = 274
|
|
104
172
|
|
|
105
173
|
|
|
174
|
+
class FormattingType(IntEnum):
|
|
175
|
+
BASE = 1
|
|
176
|
+
CURRENCY = 2
|
|
177
|
+
DATETIME = 3
|
|
178
|
+
FRACTION = 4
|
|
179
|
+
NUMBER = 5
|
|
180
|
+
PERCENTAGE = 6
|
|
181
|
+
SCIENTIFIC = 7
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class CustomFormattingType(IntEnum):
|
|
185
|
+
NUMBER = 101
|
|
186
|
+
DATETIME = 102
|
|
187
|
+
TEXT = 103
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@enum_tools.documentation.document_enum
|
|
106
191
|
class NegativeNumberStyle(IntEnum):
|
|
192
|
+
"""
|
|
193
|
+
How negative numbers are formatted.
|
|
194
|
+
|
|
195
|
+
This enum is used in cell data formats and cell custom formats using the
|
|
196
|
+
`negative_style` keyword argument.
|
|
197
|
+
"""
|
|
198
|
+
|
|
107
199
|
MINUS = 0
|
|
200
|
+
"""Negative numbers use a simple minus sign."""
|
|
108
201
|
RED = 1
|
|
202
|
+
"""Negative numbers are red with no minus sign."""
|
|
109
203
|
PARENTHESES = 2
|
|
204
|
+
"""Negative numbers are in parentheses with no minus sign."""
|
|
110
205
|
RED_AND_PARENTHESES = 3
|
|
206
|
+
"""Negative numbers are red and in parentheses with no minus sign."""
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@enum_tools.documentation.document_enum
|
|
210
|
+
class FractionAccuracy(IntEnum):
|
|
211
|
+
"""
|
|
212
|
+
How fractions are formatted.
|
|
213
|
+
|
|
214
|
+
This enum is used in cell data formats and cell custom formats using the
|
|
215
|
+
`fraction_accuracy` keyword argument.
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
THREE = 0xFFFFFFFD
|
|
219
|
+
"""Fractions are formatted with up to 3 digits in the denominator."""
|
|
220
|
+
TWO = 0xFFFFFFFE
|
|
221
|
+
"""Fractions are formatted with up to 2 digits in the denominator."""
|
|
222
|
+
ONE = 0xFFFFFFFF
|
|
223
|
+
"""Fractions are formatted with up to 1 digit in the denominator."""
|
|
224
|
+
HALVES = 2
|
|
225
|
+
"""Fractions are formatted to the nearest half."""
|
|
226
|
+
QUARTERS = 4
|
|
227
|
+
"""Fractions are formatted to the nearest quarter."""
|
|
228
|
+
EIGTHS = 8
|
|
229
|
+
"""Fractions are formatted to the nearest eighth."""
|
|
230
|
+
SIXTEENTHS = 16
|
|
231
|
+
"""Fractions are formatted to the nearest sixteenth."""
|
|
232
|
+
TENTHS = 10
|
|
233
|
+
"""Fractions are formatted to the nearest tenth."""
|
|
234
|
+
HUNDRETHS = 100
|
|
235
|
+
"""Fractions are formatted to the nearest hundredth."""
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
ALLOWED_FORMATTING_PARAMETERS = {
|
|
239
|
+
FormattingType.BASE: ["base", "base_places", "base_use_minus_sign"],
|
|
240
|
+
FormattingType.CURRENCY: [
|
|
241
|
+
"decimal_places",
|
|
242
|
+
"show_thousands_separator",
|
|
243
|
+
"negative_style",
|
|
244
|
+
"use_accounting_style",
|
|
245
|
+
"currency_code",
|
|
246
|
+
],
|
|
247
|
+
FormattingType.DATETIME: ["date_time_format"],
|
|
248
|
+
FormattingType.FRACTION: ["fraction_accuracy"],
|
|
249
|
+
FormattingType.NUMBER: ["decimal_places", "show_thousands_separator", "negative_style"],
|
|
250
|
+
FormattingType.PERCENTAGE: ["decimal_places", "show_thousands_separator", "negative_style"],
|
|
251
|
+
FormattingType.SCIENTIFIC: ["decimal_places"],
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
FORMAT_TYPE_MAP = {
|
|
255
|
+
FormattingType.BASE: FormatType.BASE,
|
|
256
|
+
FormattingType.CURRENCY: FormatType.CURRENCY,
|
|
257
|
+
FormattingType.DATETIME: FormatType.DATE,
|
|
258
|
+
FormattingType.FRACTION: FormatType.FRACTION,
|
|
259
|
+
FormattingType.NUMBER: FormatType.DECIMAL,
|
|
260
|
+
FormattingType.PERCENTAGE: FormatType.PERCENT,
|
|
261
|
+
FormattingType.SCIENTIFIC: FormatType.SCIENTIFIC,
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
CUSTOM_FORMAT_TYPE_MAP = {
|
|
265
|
+
CustomFormattingType.NUMBER: FormatType.CUSTOM_NUMBER,
|
|
266
|
+
CustomFormattingType.DATETIME: FormatType.CUSTOM_DATE,
|
|
267
|
+
CustomFormattingType.TEXT: FormatType.CUSTOM_TEXT,
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class PaddingType(IntEnum):
|
|
272
|
+
NONE = 0
|
|
273
|
+
ZEROS = 1
|
|
274
|
+
SPACES = 2
|