numbers-parser 4.17.0__py3-none-any.whl → 4.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
numbers_parser/formula.py CHANGED
@@ -1,66 +1,13 @@
1
- import math
2
1
  import re
3
2
  import warnings
4
3
  from datetime import datetime, timedelta
5
4
 
6
- from numbers_parser.constants import DECIMAL128_BIAS, OPERATOR_PRECEDENCE
7
5
  from numbers_parser.exceptions import UnsupportedWarning
8
6
  from numbers_parser.generated import TSCEArchives_pb2 as TSCEArchives
9
7
  from numbers_parser.generated.functionmap import FUNCTION_MAP
10
- from numbers_parser.generated.TSCEArchives_pb2 import ASTNodeArrayArchive
11
- from numbers_parser.numbers_uuid import NumbersUUID
12
- from numbers_parser.tokenizer import Token, Tokenizer, parse_numbers_range
13
- from numbers_parser.xrefs import CellRange, CellRangeType
14
8
 
15
9
  FUNCTION_NAME_TO_ID = {v: k for k, v in FUNCTION_MAP.items()}
16
10
 
17
- OPERATOR_MAP = str.maketrans({"×": "*", "÷": "/", "≥": ">=", "≤": "<=", "≠": "<>"})
18
-
19
-
20
- OPERATOR_INFIX_MAP = {
21
- "=": "EQUAL_TO_NODE",
22
- "+": "ADDITION_NODE",
23
- "-": "SUBTRACTION_NODE",
24
- "*": "MULTIPLICATION_NODE",
25
- "/": "DIVISION_NODE",
26
- "&": "CONCATENATION_NODE",
27
- "^": "POWER_NODE",
28
- "==": "EQUAL_TO_NODE",
29
- "<>": "NOT_EQUAL_TO_NODE",
30
- "<": "LESS_THAN_NODE",
31
- ">": "GREATER_THAN_NODE",
32
- "<=": "LESS_THAN_OR_EQUAL_TO_NODE",
33
- ">=": "GREATER_THAN_OR_EQUAL_TO_NODE",
34
- }
35
-
36
- OPERAND_ARCHIVE_MAP = {
37
- Token.RANGE: "range_archive",
38
- Token.NUMBER: "number_archive",
39
- Token.TEXT: "text_archive",
40
- Token.LOGICAL: "logical_archive",
41
- Token.ERROR: "error",
42
- }
43
-
44
- # TODO: Understand what the frozen stick bits do!
45
- FROZEN_STICKY_BIT_MAP = {
46
- (False, False, False, False): None,
47
- (False, True, False, False): (True, False, False, False),
48
- (False, False, False, True): (False, False, True, False),
49
- (False, True, False, True): (True, False, True, False),
50
- (False, False, True, False): None,
51
- (False, False, True, True): (False, False, True, False),
52
- (False, True, True, False): (True, False, False, False),
53
- (False, True, True, True): (True, False, True, False),
54
- (True, False, False, False): None,
55
- (True, True, False, False): (True, False, False, False),
56
- (True, False, False, True): (False, False, True, False),
57
- (True, True, False, True): (True, False, True, False),
58
- (True, False, True, False): None,
59
- (True, True, True, False): (True, False, False, False),
60
- (True, False, True, True): (False, False, True, False),
61
- (True, True, True, True): (True, False, True, False),
62
- }
63
-
64
11
 
65
12
  class Formula(list):
66
13
  def __init__(self, model, table_id, row, col) -> None:
@@ -70,364 +17,6 @@ class Formula(list):
70
17
  self.row = row
71
18
  self.col = col
72
19
 
73
- @classmethod
74
- def from_str(cls, model, table_id, row, col, formula_str) -> int:
75
- """
76
- Create a new formula by parsing a formula string and
77
- return the allocated formula ID.
78
- """
79
- formula = cls(model, table_id, row, col)
80
- formula._tokens = cls.formula_tokens(formula_str)
81
-
82
- model._formulas.add_table(table_id)
83
- formula_attrs = {"AST_node_array": {"AST_node": []}}
84
- ast_node = formula_attrs["AST_node_array"]["AST_node"]
85
-
86
- for token in formula._tokens:
87
- if token.type == Token.FUNC and token.subtype == Token.OPEN:
88
- if token.value not in FUNCTION_NAME_TO_ID:
89
- table_name = model.table_name(table_id)
90
- cell_ref = f"{table_name}@[{row},{col}]"
91
- warnings.warn(
92
- f"{cell_ref}: function {token.value} is not supported.",
93
- UnsupportedWarning,
94
- stacklevel=2,
95
- )
96
- return None
97
-
98
- ast_node.append(
99
- {
100
- "AST_node_type": "FUNCTION_NODE",
101
- "AST_function_node_index": FUNCTION_NAME_TO_ID[token.value],
102
- "AST_function_node_numArgs": token.num_args,
103
- },
104
- )
105
- elif token.type == Token.OPERAND:
106
- func = getattr(formula, OPERAND_ARCHIVE_MAP[token.subtype])
107
- ast_node.append(func(token))
108
-
109
- elif token.type == Token.OP_IN:
110
- ast_node.append({"AST_node_type": OPERATOR_INFIX_MAP[token.value]})
111
-
112
- return model._formulas.lookup_key(
113
- table_id,
114
- TSCEArchives.FormulaArchive(**formula_attrs),
115
- )
116
-
117
- def add_table_xref_info(self, ref: dict[str, CellRange], node: dict) -> None:
118
- if not ref.name_scope_2:
119
- return
120
-
121
- sheet_name = (
122
- ref.name_scope_1
123
- if ref.name_scope_1
124
- else self._model.sheet_name(self._model.table_id_to_sheet_id(self._table_id))
125
- )
126
- table_uuid = self._model.table_name_to_uuid(sheet_name, ref.name_scope_2)
127
- xref_archive = NumbersUUID(table_uuid).protobuf4
128
- node["AST_cross_table_reference_extra_info"] = (
129
- TSCEArchives.ASTNodeArrayArchive.ASTCrossTableReferenceExtraInfoArchive(
130
- table_id=xref_archive,
131
- )
132
- )
133
-
134
- @staticmethod
135
- def _ast_sticky_bits(ref: dict[str, CellRange]) -> dict[str, str]:
136
- return {
137
- "begin_row_is_absolute": ref.row_start_is_abs,
138
- "begin_column_is_absolute": ref.col_start_is_abs,
139
- "end_row_is_absolute": ref.row_end_is_abs,
140
- "end_column_is_absolute": ref.col_end_is_abs,
141
- }
142
-
143
- def range_archive(self, token: "Token") -> dict:
144
- ref = parse_numbers_range(self._model, token.value)
145
-
146
- if ref.range_type == CellRangeType.RANGE:
147
- ast_colon_tract = {
148
- "preserve_rectangular": True,
149
- "relative_row": [{}],
150
- "relative_column": [{}],
151
- "absolute_row": [{}],
152
- "absolute_column": [{}],
153
- }
154
-
155
- if not (ref.col_start_is_abs and ref.col_end_is_abs):
156
- ast_colon_tract["relative_column"][0]["range_begin"] = (
157
- (ref.col_end - self.col) if ref.col_start_is_abs else (ref.col_start - self.col)
158
- )
159
-
160
- if not (ref.col_start_is_abs) and not (ref.col_end_is_abs):
161
- ast_colon_tract["relative_column"][0]["range_end"] = ref.col_end - self.col
162
-
163
- if not (ref.row_start_is_abs and ref.row_end_is_abs):
164
- ast_colon_tract["relative_row"][0]["range_begin"] = ref.row_start - self.row
165
- if ref.row_start != ref.row_end:
166
- ast_colon_tract["relative_row"][0]["range_end"] = ref.row_end - self.row
167
-
168
- if ref.col_start_is_abs or ref.col_end_is_abs:
169
- ast_colon_tract["absolute_column"][0]["range_begin"] = (
170
- ref.col_start if ref.row_start_is_abs else ref.col_end
171
- )
172
-
173
- if ref.col_start_is_abs and ref.col_end_is_abs:
174
- ast_colon_tract["absolute_column"][0]["range_end"] = ref.col_end
175
-
176
- if ref.row_start_is_abs or ref.row_end_is_abs:
177
- ast_colon_tract["absolute_row"][0]["range_begin"] = (
178
- ref.row_start if ref.row_start_is_abs else ref.row_end
179
- )
180
-
181
- if ref.row_start_is_abs and ref.row_end_is_abs:
182
- ast_colon_tract["absolute_row"][0]["range_end"] = ref.row_end
183
-
184
- node = {
185
- "AST_node_type": "COLON_TRACT_NODE",
186
- "AST_sticky_bits": Formula._ast_sticky_bits(ref),
187
- "AST_colon_tract": ast_colon_tract,
188
- }
189
-
190
- key = (
191
- ref.col_start_is_abs,
192
- ref.col_end_is_abs,
193
- ref.row_start_is_abs,
194
- ref.row_end_is_abs,
195
- )
196
- ast_frozen_sticky_bits = {}
197
- if FROZEN_STICKY_BIT_MAP[key] is not None:
198
- sticky_bits = FROZEN_STICKY_BIT_MAP[key]
199
- ast_frozen_sticky_bits["begin_column_is_absolute"] = sticky_bits[0]
200
- ast_frozen_sticky_bits["end_column_is_absolute"] = sticky_bits[1]
201
- ast_frozen_sticky_bits["begin_row_is_absolute"] = sticky_bits[2]
202
- ast_frozen_sticky_bits["end_row_is_absolute"] = sticky_bits[3]
203
- node["AST_frozen_sticky_bits"] = ast_frozen_sticky_bits
204
-
205
- for key in ["absolute_row", "relative_row", "absolute_column", "relative_column"]:
206
- if len(ast_colon_tract[key][0].keys()) == 0:
207
- del ast_colon_tract[key]
208
-
209
- self.add_table_xref_info(ref, node)
210
-
211
- return node
212
-
213
- if ref.range_type == CellRangeType.ROW_RANGE:
214
- row_start = ref.row_start if ref.row_start_is_abs else ref.row_start - self.row
215
- row_end = ref.row_end if ref.row_end_is_abs else ref.row_end - self.row
216
-
217
- node = {
218
- "AST_node_type": "COLON_TRACT_NODE",
219
- "AST_sticky_bits": Formula._ast_sticky_bits(ref),
220
- "AST_colon_tract": {
221
- "relative_row": [{"range_begin": row_start, "range_end": row_end}],
222
- "absolute_column": [{"range_begin": 0x7FFF}],
223
- "preserve_rectangular": True,
224
- },
225
- }
226
- self.add_table_xref_info(ref, node)
227
- return node
228
-
229
- if ref.range_type == CellRangeType.COL_RANGE:
230
- col_start = ref.col_start if ref.col_start_is_abs else ref.col_start - self.col
231
- col_end = ref.col_end if ref.col_end_is_abs else ref.col_end - self.col
232
-
233
- node = {
234
- "AST_node_type": "COLON_TRACT_NODE",
235
- "AST_sticky_bits": Formula._ast_sticky_bits(ref),
236
- "AST_colon_tract": {
237
- "relative_column": [{"range_begin": col_start, "range_end": col_end}],
238
- "absolute_row": [{"range_begin": 2147483647}],
239
- "preserve_rectangular": True,
240
- },
241
- }
242
- self.add_table_xref_info(ref, node)
243
- return node
244
-
245
- if ref.range_type == CellRangeType.NAMED_RANGE:
246
- new_ref = self._model.name_ref_cache.lookup_named_ref(self._table_id, ref)
247
- if new_ref.row_start is not None:
248
- row_start = (
249
- new_ref.row_start if ref.row_start_is_abs else new_ref.row_start - self.row
250
- )
251
- row_end = new_ref.row_end if ref.row_end_is_abs else new_ref.row_end - self.row
252
- node = {
253
- "AST_node_type": "COLON_TRACT_NODE",
254
- "AST_sticky_bits": Formula._ast_sticky_bits(ref),
255
- "AST_colon_tract": {
256
- "relative_row": [{"range_begin": row_start, "range_end": row_end}],
257
- "absolute_column": [{"range_begin": 0x7FFF}],
258
- "preserve_rectangular": True,
259
- },
260
- }
261
- else:
262
- col_start = (
263
- new_ref.col_start if ref.col_start_is_abs else new_ref.col_start - self.col
264
- )
265
- col_end = new_ref.col_end if ref.col_end_is_abs else new_ref.col_end - self.col
266
- node = {
267
- "AST_node_type": "COLON_TRACT_NODE",
268
- "AST_sticky_bits": Formula._ast_sticky_bits(ref),
269
- "AST_colon_tract": {
270
- "relative_column": [{"range_begin": col_start, "range_end": col_end}],
271
- "absolute_row": [{"range_begin": 2147483647}],
272
- "preserve_rectangular": True,
273
- },
274
- }
275
-
276
- self.add_table_xref_info(ref, node)
277
- return node
278
-
279
- if ref.range_type == CellRangeType.NAMED_ROW_COLUMN:
280
- new_ref = self._model.name_ref_cache.lookup_named_ref(self._table_id, ref)
281
- if new_ref.row_start is not None:
282
- row_start = (
283
- new_ref.row_start if ref.row_start_is_abs else new_ref.row_start - self.row
284
- )
285
- node = {
286
- "AST_node_type": "COLON_TRACT_NODE",
287
- "AST_sticky_bits": Formula._ast_sticky_bits(ref),
288
- "AST_colon_tract": {
289
- "relative_column": [{"range_begin": row_start}],
290
- "absolute_row": [{"range_begin": 2147483647}],
291
- "preserve_rectangular": True,
292
- },
293
- }
294
- else:
295
- col_start = (
296
- new_ref.col_start if ref.col_start_is_abs else new_ref.col_start - self.col
297
- )
298
- node = {
299
- "AST_node_type": "COLON_TRACT_NODE",
300
- "AST_sticky_bits": Formula._ast_sticky_bits(ref),
301
- "AST_colon_tract": {
302
- "relative_column": [{"range_begin": col_start}],
303
- "absolute_row": [{"range_begin": 2147483647}],
304
- "preserve_rectangular": True,
305
- },
306
- }
307
- self.add_table_xref_info(ref, node)
308
- return node
309
-
310
- # CellRangeType.CELL
311
- return {
312
- "AST_node_type": "CELL_REFERENCE_NODE",
313
- "AST_row": {
314
- "row": ref.row_start if ref.row_start_is_abs else ref.row_start - self.row,
315
- "absolute": ref.row_start_is_abs,
316
- },
317
- "AST_column": {
318
- "column": ref.col_start if ref.col_start_is_abs else ref.col_start - self.col,
319
- "absolute": ref.col_start_is_abs,
320
- },
321
- }
322
-
323
- def number_archive(self, token: "Token") -> ASTNodeArrayArchive.ASTNodeArchive:
324
- if float(token.value).is_integer():
325
- return {
326
- "AST_node_type": "NUMBER_NODE",
327
- "AST_number_node_number": int(float(token.value)),
328
- "AST_number_node_decimal_low": int(float(token.value)),
329
- "AST_number_node_decimal_high": 0x3040000000000000,
330
- }
331
-
332
- value = token.value
333
- exponent = (
334
- math.floor(math.log10(math.e) * math.log(abs(float(value))))
335
- if float(value) != 0.0
336
- else 0
337
- )
338
- if "E" in value:
339
- significand, exponent = value.split("E")
340
- else:
341
- significand = value
342
- exponent = 0
343
- num_dp = len(re.sub(r"0*$", "", str(significand).split(".")[1]))
344
- exponent = int(exponent) - num_dp
345
- decimal_low = int(float(significand) * 10**num_dp)
346
- decimal_high = ((DECIMAL128_BIAS * 2) + (2 * exponent)) << 48
347
-
348
- return {
349
- "AST_node_type": "NUMBER_NODE",
350
- "AST_number_node_number": float(value),
351
- "AST_number_node_decimal_low": decimal_low,
352
- "AST_number_node_decimal_high": decimal_high,
353
- }
354
-
355
- def text_archive(self, token: "Token") -> ASTNodeArrayArchive.ASTNodeArchive:
356
- # String literals from tokenizer include start and end quotes
357
- value = token.value[1:-1]
358
- # Numbers does not escape quotes in the AST
359
- value = value.replace('""', '"')
360
- return {
361
- "AST_node_type": "STRING_NODE",
362
- "AST_string_node_string": value,
363
- }
364
-
365
- def logical_archive(self, token: "Token") -> ASTNodeArrayArchive.ASTNodeArchive:
366
- return {
367
- "AST_node_type": "BOOLEAN_NODE",
368
- "AST_boolean_node_boolean": token.value.lower() == "true",
369
- }
370
-
371
- def error(self, token: "Token") -> ASTNodeArrayArchive.ASTNodeArchive:
372
- return {
373
- "AST_node_type": "BOOLEAN_NODE",
374
- "AST_boolean_node_boolean": token.value.lower() == "true",
375
- }
376
-
377
- @staticmethod
378
- def formula_tokens(formula_str: str):
379
- tok = Tokenizer(formula_str.translate(OPERATOR_MAP))
380
- return Formula.rpn_tokens(tok.items)
381
-
382
- @staticmethod
383
- def rpn_tokens(tokens):
384
- output = []
385
- operators = []
386
-
387
- for token in tokens:
388
- if token.type in ["OPERAND", "NUMBER", "LITERAL", "TEXT", "RANGE"]:
389
- output.append(token)
390
- if operators and operators[-1].type == "FUNC":
391
- operators[-1].num_args += 1
392
- elif token.type == "FUNC" and token.subtype == "OPEN":
393
- token.value = token.value[0:-1]
394
- operators.append(token)
395
- operators[-1].num_args = 0
396
- elif token.type in ["OPERATOR-POSTFIX", "OPERATOR-PREFIX"]:
397
- output.append(token)
398
- elif token.type == "OPERATOR-INFIX":
399
- while (
400
- operators
401
- and operators[-1].type == "OPERATOR-INFIX"
402
- and OPERATOR_PRECEDENCE[operators[-1].value] >= OPERATOR_PRECEDENCE[token.value]
403
- ):
404
- output.append(operators.pop())
405
- operators.append(token)
406
- elif token.type == "FUNC" and token.subtype == "CLOSE":
407
- while operators and (
408
- operators[-1].type != "FUNC" and operators[-1].subtype != "OPEN"
409
- ):
410
- output.append(operators.pop())
411
- output.append(operators.pop())
412
- elif token.type == "SEP":
413
- if operators and operators[-1].type != "FUNC":
414
- output.append(operators.pop())
415
- # Only remaining token type is PAREN
416
- elif token.subtype == "OPEN":
417
- operators.append(token)
418
- else:
419
- # Must be a CLOSE PAREN
420
- while operators and operators[-1].subtype != "OPEN":
421
- output.append(operators.pop())
422
- operators.pop()
423
- # if operators and operators[-1].type == "FUNC":
424
- # output.append(operators.pop())
425
-
426
- while operators:
427
- output.append(operators.pop())
428
-
429
- return output
430
-
431
20
  def __str__(self) -> str:
432
21
  return "".join(reversed([str(x) for x in self._stack]))
433
22
 
@@ -452,12 +41,12 @@ class Formula(list):
452
41
  num_rows = node.AST_array_node_numRow
453
42
  num_cols = node.AST_array_node_numCol
454
43
  if num_rows == 1:
455
- # 1-dimentional array: {a,b,c,d}
44
+ # 1-dimensional array: {a,b,c,d}
456
45
  args = self.popn(num_cols)
457
46
  args = ",".join(reversed(args))
458
47
  self.push(f"{{{args}}}")
459
48
  else:
460
- # 2-dimentional array: {a,b;c,d}
49
+ # 2-dimensional array: {a,b;c,d}
461
50
  rows = []
462
51
  for _row_num in range(num_rows):
463
52
  args = self.popn(num_cols)