numbers-parser 4.17.0.post1__py3-none-any.whl → 4.18.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
numbers_parser/model.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import re
4
4
  from array import array
5
5
  from collections import defaultdict
6
+ from datetime import timedelta
6
7
  from hashlib import sha1
7
8
  from itertools import chain
8
9
  from math import floor
@@ -32,6 +33,7 @@ from numbers_parser.cell import (
32
33
  PaddingType,
33
34
  Style,
34
35
  VerticalJustification,
36
+ _decode_date_format,
35
37
  )
36
38
  from numbers_parser.constants import (
37
39
  ALLOWED_FORMATTING_PARAMETERS,
@@ -47,6 +49,7 @@ from numbers_parser.constants import (
47
49
  DEFAULT_TEXT_WRAP,
48
50
  DEFAULT_TILE_SIZE,
49
51
  DOCUMENT_ID,
52
+ EPOCH,
50
53
  FORMAT_TYPE_MAP,
51
54
  MAX_TILE_SIZE,
52
55
  PACKAGE_ID,
@@ -233,6 +236,8 @@ class _NumbersModel(Cacheable):
233
236
  self._control_specs = DataLists(self, "control_cell_spec_table", "cell_spec")
234
237
  self._formulas = DataLists(self, "formula_table", "formula")
235
238
  self._table_data = {}
239
+ self._table_categories_data = {}
240
+ self._table_categories_row_mapper = {}
236
241
  self._styles = None
237
242
  self._images = {}
238
243
  self._custom_formats = None
@@ -807,38 +812,6 @@ class _NumbersModel(Cacheable):
807
812
  # Table can be empty if the document does not use FormulaOwnerDependenciesArchive
808
813
  return self._table_id_to_base_id.get(table_id)
809
814
 
810
- def get_formula_owner(self, table_id: int) -> object:
811
- table_uuid = self.table_base_id(table_id)
812
- return self.objects[self._table_base_id_to_formula_owner_id[table_uuid]]
813
-
814
- def add_formula_dependency(self, row: int, col: int, table_id: int) -> None:
815
- calc_engine = self.calc_engine()
816
- calc_engine.dependency_tracker.number_of_formulas += 1
817
- internal_formula_id = calc_engine.dependency_tracker.number_of_formulas
818
-
819
- formula_owner = self.get_formula_owner(table_id)
820
- formula_owner.cell_dependencies.cell_record.append(
821
- TSCEArchives.CellRecordExpandedArchive(column=col, row=row),
822
- )
823
- if len(formula_owner.tiled_cell_dependencies.cell_record_tiles) == 0:
824
- cell_record_id, cell_record = self.objects.create_object_from_dict(
825
- "CalculationEngine",
826
- {
827
- "internal_owner_id": internal_formula_id,
828
- "tile_column_begin": 0,
829
- "tile_row_begin": 0,
830
- },
831
- TSCEArchives.CellRecordTileArchive,
832
- )
833
- formula_owner.tiled_cell_dependencies.cell_record_tiles.append(
834
- TSPMessages.Reference(identifier=cell_record_id),
835
- )
836
- else:
837
- cell_record_id = formula_owner.tiled_cell_dependencies.cell_record_tiles[0].identifier
838
- cell_record = self.objects[cell_record_id]
839
-
840
- cell_record.cell_records.append(formula_owner.cell_dependencies.cell_record[-1])
841
-
842
815
  @cache(num_args=0)
843
816
  def calc_engine_id(self):
844
817
  """Return the CalculationEngine ID for the current document."""
@@ -867,7 +840,7 @@ class _NumbersModel(Cacheable):
867
840
  self._merge_cells[table_id].add_anchor(row_start, col_start, size)
868
841
 
869
842
  @cache()
870
- def calculate_new_merge_cell_ranges(self, table_id) -> None:
843
+ def calculate_merges_using_formula_stores(self, table_id) -> None:
871
844
  table_model = self.objects[table_id]
872
845
  formulas = table_model.merge_owner.formula_store.formulas
873
846
  if len(formulas) == 0:
@@ -887,7 +860,7 @@ class _NumbersModel(Cacheable):
887
860
  )
888
861
 
889
862
  @cache()
890
- def calculate_merge_cell_ranges(self, table_id) -> None:
863
+ def calculate_merges_using_dependency_archives(self, table_id) -> None:
891
864
  """Extract all the merge cell ranges for the Table."""
892
865
  # See details in Numbers.md#merge-ranges.
893
866
  owner_id_map = self.owner_id_map()
@@ -910,6 +883,8 @@ class _NumbersModel(Cacheable):
910
883
  record_range.bottom_right_column,
911
884
  )
912
885
 
886
+ @cache()
887
+ def calculate_merges_using_region_map(self, table_id) -> None:
913
888
  base_data_store = self.objects[table_id].base_data_store
914
889
  if base_data_store.merge_region_map.identifier == 0:
915
890
  return
@@ -926,18 +901,12 @@ class _NumbersModel(Cacheable):
926
901
  )
927
902
  row_end = row_start + num_rows - 1
928
903
  col_end = col_start + num_columns - 1
929
- for row in range(row_start, row_end + 1):
930
- for col in range(col_start, col_end + 1):
931
- self._merge_cells[table_id].add_reference(
932
- row,
933
- col,
934
- (row_start, col_start, row_end, col_end),
935
- )
936
- self._merge_cells[table_id].add_anchor(row_start, col_start, (num_rows, num_columns))
904
+ self.add_merge_range(table_id, row_start, row_end, col_start, col_end)
937
905
 
938
906
  def merge_cells(self, table_id):
939
- self.calculate_new_merge_cell_ranges(table_id)
940
- self.calculate_merge_cell_ranges(table_id)
907
+ self.calculate_merges_using_formula_stores(table_id)
908
+ self.calculate_merges_using_dependency_archives(table_id)
909
+ self.calculate_merges_using_region_map(table_id)
941
910
  return self._merge_cells[table_id]
942
911
 
943
912
  def table_id_to_sheet_id(self, table_id: int) -> int:
@@ -946,25 +915,14 @@ class _NumbersModel(Cacheable):
946
915
  return sheet_id
947
916
  return None
948
917
 
949
- def table_name_to_uuid(self, sheet_name: str, table_name: str) -> str:
950
- table_ids = [tid for tid in self.table_ids() if table_name == self.table_name(tid)]
951
- if len(table_ids) == 1:
952
- return self.table_base_id(table_ids[0])
953
-
954
- sheet_name_to_id = {self.sheet_name(x): x for x in self.sheet_ids()}
955
- sheet_id = sheet_name_to_id[sheet_name]
956
- table_name_to_id = {self.table_name(x): x for x in self.table_ids(sheet_id)}
957
- table_id = table_name_to_id[table_name]
958
- return self.table_base_id(table_id)
959
-
960
918
  @cache()
961
919
  def table_uuids_to_id(self, table_uuid) -> int | None:
962
- for sheet_id in self.sheet_ids(): # pragma: no branch
920
+ for sheet_id in self.sheet_ids(): # pragma: no branch # noqa: RET503
963
921
  for table_id in self.table_ids(sheet_id):
964
922
  if table_uuid == self.table_base_id(table_id):
965
923
  return table_id
966
924
 
967
- def node_to_ref(self, table_id: int, row: int, col: int, node, merge_mode: bool = False):
925
+ def node_to_ref(self, table_id: int, row: int, col: int, node):
968
926
  def resolve_range(is_absolute, absolute_list, relative_list, offset, max_val):
969
927
  if is_absolute:
970
928
  return absolute_list[0].range_begin
@@ -1030,7 +988,6 @@ class _NumbersModel(Cacheable):
1030
988
  col_end_is_abs=node.AST_sticky_bits.end_column_is_absolute,
1031
989
  from_table_id=table_id,
1032
990
  to_table_id=to_table_id,
1033
- _do_init=not merge_mode,
1034
991
  )
1035
992
 
1036
993
  row = node.AST_row.row if node.AST_row.absolute else row + node.AST_row.row
@@ -1510,7 +1467,7 @@ class _NumbersModel(Cacheable):
1510
1467
  },
1511
1468
  TSTArchives.TableModelArchive,
1512
1469
  )
1513
- # Supresses Numbers assertions for tables sharing the same data
1470
+ # Suppress Numbers assertions for tables sharing the same data
1514
1471
  table_model.category_owner.identifier = 0
1515
1472
 
1516
1473
  column_headers_id, column_headers = self.objects.create_object_from_dict(
@@ -1719,7 +1676,7 @@ class _NumbersModel(Cacheable):
1719
1676
  "bottom_right_column": 0x7FFF,
1720
1677
  "bottom_right_row": 0x7FFFFFFF,
1721
1678
  }
1722
- spanning_depdendencies = {
1679
+ spanning_dependencies = {
1723
1680
  "total_range_for_table": null_range_ref,
1724
1681
  "body_range_for_table": null_range_ref,
1725
1682
  }
@@ -1732,8 +1689,8 @@ class _NumbersModel(Cacheable):
1732
1689
  "cell_dependencies": {},
1733
1690
  "range_dependencies": {},
1734
1691
  "volatile_dependencies": volatile_dependencies,
1735
- "spanning_column_dependencies": spanning_depdendencies,
1736
- "spanning_row_dependencies": spanning_depdendencies,
1692
+ "spanning_column_dependencies": spanning_dependencies,
1693
+ "spanning_row_dependencies": spanning_dependencies,
1737
1694
  "whole_owner_dependencies": {"dependent_cells": {}},
1738
1695
  "cell_errors": {},
1739
1696
  "base_owner_uid": base_owner_uuid.dict2,
@@ -1809,7 +1766,7 @@ class _NumbersModel(Cacheable):
1809
1766
  for k, v in presets_map.items()
1810
1767
  }
1811
1768
  for style in styles.values():
1812
- # Override __setattr__ behaviour for builtin styles
1769
+ # Override __setattr__ behavior for builtin styles
1813
1770
  style.__dict__["_update_text_style"] = False
1814
1771
  style.__dict__["_update_cell_style"] = False
1815
1772
  return styles
@@ -2142,7 +2099,7 @@ class _NumbersModel(Cacheable):
2142
2099
  # a string with a new bullet character
2143
2100
  bds = self.objects[table_id].base_data_store
2144
2101
  rich_text_table = self.objects[bds.rich_text_table.identifier]
2145
- for entry in rich_text_table.entries: # pragma: no branch
2102
+ for entry in rich_text_table.entries: # pragma: no branch # noqa: RET503
2146
2103
  if string_key == entry.key:
2147
2104
  payload = self.objects[entry.rich_text_payload.identifier]
2148
2105
  payload_storage = self.objects[payload.storage.identifier]
@@ -2596,107 +2553,160 @@ class _NumbersModel(Cacheable):
2596
2553
  # datas never appears to be an empty list (default themes include images)
2597
2554
  return max(image_ids) + 1
2598
2555
 
2599
- def table_category_data(self, table_id: int) -> dict | None:
2556
+ @classmethod
2557
+ def cell_value_to_key(
2558
+ cls,
2559
+ cell_value: TSCEArchives.CellValueArchive,
2560
+ ) -> str | int | bool | datetime:
2561
+ """Convert a CellValueArchive to a key."""
2562
+ cell_value_type = cell_value.cell_value_type
2563
+ if cell_value_type == CellValueType.STRING_TYPE:
2564
+ return cell_value.string_value.value
2565
+ if cell_value_type == CellValueType.NUMBER_TYPE:
2566
+ return cell_value.number_value.value
2567
+ if cell_value_type == CellValueType.BOOLEAN_TYPE:
2568
+ return cell_value.boolean_value.value
2569
+ if cell_value_type == CellValueType.DATE_TYPE:
2570
+ # "yyyy"
2571
+ # "yyyy-QQQ"
2572
+ # "LLLL yyyy"
2573
+ # "yyyy'-W'w"
2574
+ # "d/M/yyyy"
2575
+ # "EEEE"
2576
+ return _decode_date_format(
2577
+ cell_value.date_value.format.date_time_format,
2578
+ EPOCH + timedelta(seconds=cell_value.date_value.value),
2579
+ )
2580
+ return None
2581
+
2582
+ @cache(num_args=0)
2583
+ def group_uuid_values(self):
2584
+ return {
2585
+ NumbersUUID(self.objects[_id].group_uid): _NumbersModel.cell_value_to_key(
2586
+ self.objects[_id].group_cell_value,
2587
+ )
2588
+ for _id in self.find_refs("GroupNodeArchive")
2589
+ }
2590
+
2591
+ @cache()
2592
+ def calculate_table_categories(self, table_id: int) -> tuple[dict[int, int], dict] | None:
2600
2593
  category_owner_id = self.objects[table_id].category_owner.identifier
2594
+ if not category_owner_id:
2595
+ self._table_categories_data[table_id] = None
2596
+ self._table_categories_row_mapper[table_id] = None
2597
+ return
2598
+
2601
2599
  category_archive_id = self.objects[category_owner_id].group_by[0].identifier
2602
2600
  category_archive = self.objects[category_archive_id]
2603
2601
  if not category_archive.is_enabled:
2604
- return None
2602
+ self._table_categories_data[table_id] = None
2603
+ self._table_categories_row_mapper[table_id] = None
2604
+ return
2605
2605
 
2606
2606
  table_info = self.objects[self.table_info_id(table_id)]
2607
2607
  category_order = self.objects[table_info.category_order.identifier]
2608
2608
  row_uid_map = self.objects[category_order.uid_map.identifier]
2609
- sorted_row_uuids = [
2610
- NumbersUUID(row_uid_map.sorted_row_uids[i]).hex for i in row_uid_map.row_uid_for_index
2611
- ]
2612
2609
 
2613
- data = self._table_data[table_id]
2614
- header = [cell.value for cell in data[0]]
2615
-
2616
- def index_set_to_offsets(index_set: TSCEArchives.IndexSetArchive) -> list[int]:
2617
- """Convert an IndexSetArchive to a list of offsets."""
2618
- offsets = []
2619
- for entry in index_set.entries:
2620
- if entry.HasField("range_end"):
2621
- offsets += list(range(entry.range_begin, entry.range_end + 1))
2622
- else:
2623
- offsets += list(range(entry.range_begin, entry.range_begin + 1))
2624
- return offsets
2625
-
2626
- def cell_value_to_key(
2627
- cell_value: TSCEArchives.CellValueArchive,
2628
- ) -> str | int | bool | datetime:
2629
- """Convert a CellValueArchive to a key."""
2630
- cell_value_type = cell_value.cell_value_type
2631
- if cell_value_type == CellValueType.STRING_TYPE:
2632
- return cell_value.string_value.value
2633
- if cell_value_type == CellValueType.NUMBER_TYPE:
2634
- return cell_value.number_value.value
2635
- if cell_value_type == CellValueType.BOOLEAN_TYPE:
2636
- return cell_value.boolean_value.value
2637
- # Must be DATE_TYPE
2638
- return cell_value.date_value.value
2639
-
2640
- group_node_to_key = {
2641
- NumbersUUID(self.objects[_id].group_uid).hex: cell_value_to_key(
2642
- self.objects[_id].group_cell_value,
2643
- )
2644
- for _id in self.find_refs("GroupNodeArchive")
2610
+ group_uuids = self.group_uuid_values()
2611
+ row_uuid_to_offset = {
2612
+ NumbersUUID(uuid): row for row, uuid in enumerate(category_archive.row_uid_lookup.uuids)
2645
2613
  }
2646
- group_uuids = [NumbersUUID(x.group_uid).hex for x in category_archive.group_node_root.child]
2647
- group_uuids = [uuid for uuid in sorted_row_uuids if uuid in group_uuids]
2614
+ row_uid_for_index = [
2615
+ NumbersUUID(row_uid_map.sorted_row_uids[i]) for i in row_uid_map.row_uid_for_index
2616
+ ]
2648
2617
 
2649
- def group_hierarchy(parent: str, children: list):
2650
- nodes = {}
2618
+ def parent_relationships(parent: NumbersUUID, children: list, group_parents: dict):
2651
2619
  for child in children:
2652
- group_uuid = NumbersUUID(child.group_uid).hex
2620
+ child_uuid = NumbersUUID(child.group_uid)
2621
+ group_parents[child_uuid] = parent
2653
2622
  if len(child.child) > 0:
2654
- nodes[group_uuid] = group_hierarchy(group_uuid, child.child)
2655
- else:
2656
- nodes[group_uuid] = None
2657
- return nodes
2658
-
2659
- def assign_rows_to_categories(parent: str, children: list, categories: dict):
2660
- for child in children:
2661
- group_uuid = NumbersUUID(child.group_uid).hex
2662
- if len(child.child) == 0:
2663
- key = cell_value_to_key(child.group_cell_value)
2664
-
2665
- row_offsets = index_set_to_offsets(child.row_lookup_uids)
2666
- categories[group_uuid] = {
2667
- "key": key,
2668
- "parent": parent,
2669
- "rows": [
2670
- {header[col]: cell.value for col, cell in enumerate(data[row])}
2671
- for row in row_offsets
2672
- ],
2623
+ parent_relationships(child_uuid, child.child, group_parents)
2624
+
2625
+ group_parents = {}
2626
+ parent_relationships(None, category_archive.group_node_root.child, group_parents)
2627
+
2628
+ row = 0
2629
+ row_mapper = {}
2630
+ header = []
2631
+ in_header = True
2632
+
2633
+ nodes: dict[NumbersUUID, dict] = {}
2634
+ root_children: dict = {}
2635
+ stack: list[NumbersUUID | None] = []
2636
+ # rows that are not in any group (rare) kept here
2637
+ root_rows: list = []
2638
+
2639
+ for uuid in row_uid_for_index:
2640
+ if uuid in group_uuids:
2641
+ # this UUID is a group heading
2642
+ in_header = False
2643
+ parent = group_parents.get(uuid)
2644
+
2645
+ # ensure node exists
2646
+ if uuid not in nodes:
2647
+ nodes[uuid] = {
2648
+ "key": group_uuids[uuid],
2649
+ "children": {},
2650
+ "rows": [],
2673
2651
  }
2674
- else:
2675
- categories[group_uuid] = {
2676
- "key": group_node_to_key[group_uuid],
2677
- "parent": parent,
2678
- "rows": None,
2679
- }
2680
- assign_rows_to_categories(group_uuid, child.child, categories)
2681
-
2682
- category_tree = group_hierarchy(
2683
- NumbersUUID(category_archive.group_node_root.group_uid).hex,
2684
- category_archive.group_node_root.child,
2685
- )
2686
2652
 
2687
- categories = {}
2688
- assign_rows_to_categories(None, category_archive.group_node_root.child, categories)
2689
-
2690
- def merge_trees(a: dict, b: dict):
2691
- new_tree = {}
2692
- for k, v in a.items():
2693
- if v is not None:
2694
- new_tree[b[k]["key"]] = merge_trees(v, b)
2653
+ # attach node to its parent (or root)
2654
+ if parent is None:
2655
+ if nodes[uuid]["key"] not in root_children:
2656
+ root_children[nodes[uuid]["key"]] = nodes[uuid]
2695
2657
  else:
2696
- new_tree[b[k]["key"]] = b[k]["rows"]
2697
- return new_tree
2698
-
2699
- return merge_trees(category_tree, categories)
2658
+ if parent not in nodes:
2659
+ nodes[parent] = {
2660
+ "key": group_uuids[parent],
2661
+ "children": {},
2662
+ "rows": [],
2663
+ }
2664
+ parent_node = nodes[parent]
2665
+ if nodes[uuid]["key"] not in parent_node["children"]:
2666
+ parent_node["children"][nodes[uuid]["key"]] = nodes[uuid]
2667
+
2668
+ # update stack to current nesting (pop until parent is on top)
2669
+ while stack and stack[-1] != parent:
2670
+ stack.pop()
2671
+ stack.append(uuid)
2672
+ else:
2673
+ mapped_row = row_uuid_to_offset[uuid]
2674
+ if in_header:
2675
+ header.append(self._table_data[table_id][mapped_row])
2676
+ # assign this row to the deepest open group, or root
2677
+ elif stack:
2678
+ nodes[stack[-1]]["rows"].append(self._table_data[table_id][mapped_row])
2679
+ else:
2680
+ root_rows.append(self._table_data[table_id][mapped_row])
2681
+
2682
+ row_mapper[row] = mapped_row
2683
+ row += 1
2684
+
2685
+ # helper to convert node dicts to nested mapping (keys -> children or rows)
2686
+ def node_to_structure(node: dict):
2687
+ if not node["children"]:
2688
+ return node["rows"]
2689
+ out = {}
2690
+ for child_key, child_node in node["children"].items():
2691
+ out[child_key] = node_to_structure(child_node)
2692
+ # if this node also has rows in addition to children, include them under a special key
2693
+ if node["rows"]:
2694
+ out["_rows"] = node["rows"]
2695
+ return out
2696
+
2697
+ maximally_nested = {}
2698
+ for key, node in root_children.items():
2699
+ maximally_nested[key] = node_to_structure(node)
2700
+ if root_rows:
2701
+ maximally_nested["_rows"] = root_rows
2702
+
2703
+ self._table_categories_data[table_id] = maximally_nested
2704
+ self._table_categories_row_mapper[table_id] = {
2705
+ row: row_uuid_to_offset[uuid]
2706
+ for row, uuid in enumerate(
2707
+ uuid for uuid in row_uid_for_index if uuid not in group_uuids
2708
+ )
2709
+ }
2700
2710
 
2701
2711
 
2702
2712
  def rgb(obj) -> RGB:
numbers_parser/xrefs.py CHANGED
@@ -67,8 +67,6 @@ class CellRange:
67
67
  _table_names: list[str] = field(init=False, default=None, repr=False)
68
68
 
69
69
  def __post_init__(self):
70
- if not self._do_init:
71
- return
72
70
  if self._table_names is None:
73
71
  self._initialize_table_data()
74
72
  self.model.name_ref_cache.refresh()