PyPI - numbers-parser - Versions diffs - 3.9.7__tar.gz → 3.10.1__tar.gz - Mend

numbers-parser 3.9.7tar.gz → 3.10.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: numbers-parser
-Version: 3.9.7
+Version: 3.10.1
 Summary: Read and write Apple Numbers spreadsheets
 Home-page: https://github.com/masaccio/numbers-parser
 License: MIT
@@ -200,6 +200,15 @@ else:
 Bulleted and numbered data can also be extracted with the bullet or number characters present in the text for each line in the cell in the same way as above but using the `formatted_bullets` property. A single space is inserted between the bullet character and the text string and in the case of bullets, this will be the Unicode character seen in Numbers, for example `"• some text"`.
+### Hyperlinks
+Numbers does not support hyperlinks to cells within a spreadsheet, but does allow embedding links in cells. When cells contain hyperlinks, `numbers_parser` returns the text version of the cell. The `hyperlinks` property of cells where `is_bulleted` is `True` is a list of text and URL tuples:
+``` python
+cell = table.cell(0, 0)
+(text, url) = cell.hyperlinks[0]
+```
 ###  Cell images
 Querying cell formats is currently limited to image backrgounds only. If a cell has no background image, `None` is returned for all calls.
@@ -351,7 +360,7 @@ The default protobuf package installation may not include the C++ optimised vers
  To include the C++ support, download a released version of Google protobuf [from github](https://github.com/protocolbuffers/protobuf). Build instructions are described in [`src/README.md`](https://github.com/protocolbuffers/protobuf/blob/main/src/README).These have changed greatly over time, but as of April 2023, this was useful:
-```
+``` shell
 bazel build :protoc :protobuf
 cmake . -DCMAKE_CXX_STANDARD=14
 cmake --build . --parallel 8

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/README.md RENAMED Viewed

@@ -172,6 +172,15 @@ else:
 Bulleted and numbered data can also be extracted with the bullet or number characters present in the text for each line in the cell in the same way as above but using the `formatted_bullets` property. A single space is inserted between the bullet character and the text string and in the case of bullets, this will be the Unicode character seen in Numbers, for example `"• some text"`.
+### Hyperlinks
+Numbers does not support hyperlinks to cells within a spreadsheet, but does allow embedding links in cells. When cells contain hyperlinks, `numbers_parser` returns the text version of the cell. The `hyperlinks` property of cells where `is_bulleted` is `True` is a list of text and URL tuples:
+``` python
+cell = table.cell(0, 0)
+(text, url) = cell.hyperlinks[0]
+```
 ###  Cell images
 Querying cell formats is currently limited to image backrgounds only. If a cell has no background image, `None` is returned for all calls.
@@ -323,7 +332,7 @@ The default protobuf package installation may not include the C++ optimised vers
  To include the C++ support, download a released version of Google protobuf [from github](https://github.com/protocolbuffers/protobuf). Build instructions are described in [`src/README.md`](https://github.com/protocolbuffers/protobuf/blob/main/src/README).These have changed greatly over time, but as of April 2023, this was useful:
-```
+``` shell
 bazel build :protoc :protobuf
 cmake . -DCMAKE_CXX_STANDARD=14
 cmake --build . --parallel 8

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/pyproject.toml RENAMED Viewed

@@ -12,7 +12,7 @@ name = "numbers-parser"
 packages = [{include = "numbers_parser", from = "src"}]
 readme = "README.md"
 repository = "https://github.com/masaccio/numbers-parser"
-version = "3.9.7"
+version = "3.10.1"
 [tool.poetry.scripts]
 cat-numbers = "numbers_parser._cat_numbers:main"
@@ -50,8 +50,7 @@ omit = ["src/numbers_parser/generated/*.py"]
 directory = "coverage_html_report"
 [tool.pytest.ini_options]
-addopts = "-ra -s"
-minversion = 6.0
+addopts = "--cov=src/numbers_parser --cov-report=term-missing"
 [tool.tox]
 legacy_tox_ini = """

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/_cat_numbers.py RENAMED Viewed

@@ -1,11 +1,15 @@
 import argparse
 import csv
+import logging
 import sys
 from numbers_parser import Document, _get_version
+from numbers_parser import __name__ as numbers_parser_name
 from numbers_parser.exceptions import FileFormatError
 from numbers_parser.cell import ErrorCell
+logger = logging.getLogger(numbers_parser_name)
 def command_line_parser():
     parser = argparse.ArgumentParser(
@@ -49,6 +53,9 @@ def command_line_parser():
         "-t", "--table", action="append", help="Names of table(s) to include in export"
     )
     parser.add_argument("document", nargs="*", help="Document(s) to export")
+    parser.add_argument(
+        "--debug", default=False, action="store_true", help="Enable debug logging"
+    )
     return parser
@@ -100,6 +107,13 @@ def main():
     elif len(args.document) == 0:
         parser.print_help()
     else:
+        hdlr = logging.StreamHandler()
+        hdlr.setFormatter(logging.Formatter("%(levelname)s:%(name)s:%(message)s"))
+        logger.addHandler(hdlr)
+        if args.debug:
+            logger.setLevel("DEBUG")
+        else:
+            logger.setLevel("ERROR")
         for filename in args.document:
             try:
                 if args.list_sheets:

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/_unpack_numbers.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import os
 import json
+import logging
 import regex
 import sys
@@ -9,14 +10,17 @@ from base64 import b64decode
 from binascii import hexlify
 from compact_json import Formatter
 from numbers_parser.file import read_numbers_file
 from numbers_parser import _get_version
+from numbers_parser import __name__ as numbers_parser_name
 from numbers_parser.iwafile import IWAFile
 from numbers_parser.exceptions import FileFormatError, UnsupportedError, FileError
 from numbers_parser.numbers_uuid import NumbersUUID
+logger = logging.getLogger(numbers_parser_name)
 def ensure_directory_exists(prefix, path):
     """Ensure that a path's directory exists."""
     parts = os.path.split(path)
@@ -113,6 +117,9 @@ def main():
         "--pretty", action="store_true", help="Enable all prettifying options"
     )
     parser.add_argument("--output", "-o", help="directory name to unpack into")
+    parser.add_argument(
+        "--debug", default=False, action="store_true", help="Enable debug logging"
+    )
     args = parser.parse_args()
     if args.version:
         print(_get_version())
@@ -125,6 +132,13 @@ def main():
     elif len(args.document) == 0:
         parser.print_help()
     else:
+        hdlr = logging.StreamHandler()
+        hdlr.setFormatter(logging.Formatter("%(levelname)s:%(name)s:%(message)s"))
+        logger.addHandler(hdlr)
+        if args.debug:
+            logger.setLevel("DEBUG")
+        else:
+            logger.setLevel("ERROR")
         for document in args.document:
             output_dir = args.output or document.replace(".numbers", "")
             try:

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/cell.py RENAMED Viewed

@@ -6,6 +6,7 @@ from numbers_parser.cell_storage import CellType, CellStorage
 from pendulum import duration, Duration, DateTime
 from functools import lru_cache
+from typing import List, Tuple
 class Cell:
@@ -48,8 +49,8 @@ class Cell:
             cell = DurationCell(*row_col, value)
         elif cell_storage.type == CellType.ERROR:
             cell = ErrorCell(*row_col)
-        elif cell_storage.type == CellType.BULLET:
-            cell = BulletedTextCell(*row_col, cell_storage.value)
+        elif cell_storage.type == CellType.RICH_TEXT:
+            cell = RichTextCell(*row_col, cell_storage.value)
         else:
             raise UnsupportedError(  # pragma: no cover
                 f"Unsupport cell type {cell_storage.type} "
@@ -140,18 +141,20 @@ class TextCell(Cell):
         return self._value
-class BulletedTextCell(Cell):
+class RichTextCell(Cell):
     def __init__(self, row_num: int, col_num: int, value):
         self._type = TSTArchives.automaticCellType
         super().__init__(row_num, col_num, value["text"])
         self._bullets = value["bullets"]
-        self._formatted_bullets = [
-            value["bullet_chars"][i] + " " + value["bullets"][i]
-            if value["bullet_chars"][i] is not None
-            else value["bullets"][i]
-            for i in range(len(self._bullets))
-        ]
-        self.is_bulleted = True
+        self._hyperlinks = value["hyperlinks"]
+        if value["bulleted"]:
+            self._formatted_bullets = [
+                value["bullet_chars"][i] + " " + value["bullets"][i]
+                if value["bullet_chars"][i] is not None
+                else value["bullets"][i]
+                for i in range(len(self._bullets))
+            ]
+            self.is_bulleted = True
     @property
     def value(self) -> str:
@@ -165,6 +168,15 @@ class BulletedTextCell(Cell):
     def formatted_bullets(self) -> str:
         return self._formatted_bullets
+    @property
+    def hyperlinks(self) -> List[Tuple]:
+        return self._hyperlinks
+# Backwards compatibility to earlier class names
+class BulletedTextCell(RichTextCell):
+    pass
 class EmptyCell(Cell):
     def __init__(self, row_num: int, col_num: int):

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/cell_storage.py RENAMED Viewed

@@ -94,32 +94,6 @@ CELL_STORAGE_MAP_V5 = OrderedDict(
     ]
 )
-# CELL_STORAGE_MAP_V4 = OrderedDict(
-#     [
-#         (0x2, {"attr": "cell_style_id"}),
-#         (0x80, {"attr": "text_style_id"}),  # SheetJS skips
-#         (0x400, {"attr": "conditional_style_id"}),  # SheetJS skips
-#         (0x800, {"attr": "conditional_style_rule_id"}),  # SheetJS skips
-#         (0x4, {"attr": "current_format_id"}),  # SheetJS skips
-#         (0x8, {"attr": "formula_id"}),  # SheetJS skips
-#         (0x100, {"attr": "formula_error_id"}),  # SheetJS skips
-#         (0x200, {"attr": "rich_id"}),
-#         (0x1000, {"attr": "comment_id"}),  # SheetJS skips
-#         (0x2000, {"attr": "import_warning_id"}),  # SheetJS skips
-#         (0x10, {"attr": "string_id"}),
-#         (0x20, {"attr": "double", "size": 8}),
-#         (0x40, {"attr": "seconds", "size": 8}),
-#         (0x10000, {"attr": "num_format_id"}),  # SheetJS skips
-#         (0x80000, {"attr": "currency_format_id"}),  # SheetJS skips
-#         (0x20000, {"attr": "date_format_id"}),  # SheetJS skips
-#         (0x40000, {"attr": "duration_format_id"}),  # SheetJS skips
-#         (0x100000, {"attr": "control_format_id"}),  # SheetJS skips
-#         (0x200000, {"attr": "custom_format_id"}),  # SheetJS skips
-#         (0x400000, {"attr": "base_format_id"}),  # SheetJS skips
-#         (0x800000, {"attr": "multiple_choice_id"}),  # SheetJS skips
-#     ]
-# )
 class CellStorage:
     def __init__(  # noqa: C901
@@ -178,8 +152,8 @@ class CellStorage:
             self.value = None
             self.type = CellType.ERROR
         elif cell_type == TSTArchives.automaticCellType:
-            self.value = self.model.table_bullets(self.table_id, self.rich_id)
-            self.type = CellType.BULLET
+            self.value = self.model.table_rich_text(self.table_id, self.rich_id)
+            self.type = CellType.RICH_TEXT
         elif cell_type == 10:
             self.value = self.d128
             self.type = CellType.NUMBER

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/constants.py RENAMED Viewed

@@ -43,7 +43,7 @@ class CellType(IntEnum):
     BOOL = 5
     DURATION = 6
     ERROR = 7
-    BULLET = 8
+    RICH_TEXT = 8
 class CellPadding(IntEnum):

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/file.py RENAMED Viewed

@@ -1,3 +1,5 @@
+import logging
 from io import BytesIO
 from zipfile import ZipFile, BadZipFile
@@ -6,8 +8,12 @@ from numbers_parser.exceptions import FileError, FileFormatError
 import os
+logger = logging.getLogger(__name__)
+debug = logger.debug
 def read_numbers_file(path, file_handler=None, object_handler=None):
+    debug("read_numbers_file: path=%s", path)
     if os.path.isdir(path):
         if os.path.isfile(os.path.join(path, "Index.zip")):
             get_objects_from_zip_file(
@@ -80,6 +86,7 @@ def extract_iwa_archives(blob, filename, file_handler, object_handler):
         return
     try:
+        debug("extract_iwa_archives: filename=%s", filename)
         iwaf = IWAFile.from_buffer(blob, filename)
     except Exception as e:  # pragma: no cover
         raise FileFormatError(f"{filename}: invalid IWA file {filename}") from e

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/iwafile.py RENAMED Viewed

@@ -1,5 +1,6 @@
 # Forked from https://github.com/psobot/keynote-parser/blob/master/keynote_parser/codec.py
+import logging
 import struct
 import snappy
@@ -15,6 +16,9 @@ from google.protobuf.internal.decoder import _DecodeVarint32
 from google.protobuf.json_format import MessageToDict, ParseDict
 from google.protobuf.message import EncodeError
+logger = logging.getLogger(__name__)
+debug = logger.debug
 class IWAFile(object):
     def __init__(self, chunks, filename=None):
@@ -26,6 +30,7 @@ class IWAFile(object):
         try:
             chunks = []
             while data:
+                debug("from_buffer: filename=%s len=%d", filename, len(data))
                 chunk, data = IWACompressedChunk.from_buffer(data, filename)
                 chunks.append(chunk)
@@ -88,6 +93,7 @@ class IWACompressedChunk(object):
         data = b"".join(cls._decompress_all(data))
         archives = []
         while data:
+            debug("from_buffer: filename=%s len=%d", filename, len(data))
             archive, data = IWAArchiveSegment.from_buffer(data, filename)
             archives.append(archive)
         return cls(archives), None

{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/model.py RENAMED Viewed

@@ -47,6 +47,7 @@ from numbers_parser.generated import TSPMessages_pb2 as TSPMessages
 from numbers_parser.generated import TSPArchiveMessages_pb2 as TSPArchiveMessages
 from numbers_parser.generated import TSTArchives_pb2 as TSTArchives
 from numbers_parser.generated import TSCEArchives_pb2 as TSCEArchives
+from numbers_parser.generated import TSWPArchives_pb2 as TSWPArchives
 class DataLists:
@@ -182,9 +183,12 @@ class _NumbersModel:
         #  },
         row_bucket_map = {i: None for i in range(self.objects[table_id].number_of_rows)}
         bds = self.objects[table_id].base_data_store
-        buckets = self.objects[bds.rowHeaders.buckets[0].identifier].headers
-        for i, bucket in enumerate(buckets):
-            row_bucket_map[bucket.index] = i
+        bucket_ids = [x.identifier for x in bds.rowHeaders.buckets]
+        idx = 0
+        for bucket_id in bucket_ids:
+            for header in self.objects[bucket_id].headers:
+                row_bucket_map[header.index] = idx
+                idx += 1
         return row_bucket_map
     def number_of_rows(self, table_id, num_rows=None):
@@ -1144,10 +1148,9 @@ class _NumbersModel:
         return cell
     @lru_cache(maxsize=None)
-    def table_bullets(self, table_id: int, string_key: int) -> Dict:
+    def table_rich_text(self, table_id: int, string_key: int) -> Dict:
         """
-        Extract bullets from a rich text data cell.
-        Returns None if the cell is not rich text
+        Extract bullets and hyperlinks from a rich text data cell.
         """
         # The table model base data store contains a richTextTable field
         # which is a reference to a TST.TableDataList. The TableDataList
@@ -1186,13 +1189,27 @@ class _NumbersModel:
             if string_key == entry.key:
                 payload = self.objects[entry.rich_text_payload.identifier]
                 payload_storage = self.objects[payload.storage.identifier]
-                payload_entries = payload_storage.table_para_style.entries
-                table_list_styles = payload_storage.table_list_style.entries
-                offsets = [e.character_index for e in payload_entries]
+                smartfield_entries = payload_storage.table_smartfield.entries
                 cell_text = payload_storage.text[0]
+                hyperlinks = []
+                for i, e in enumerate(smartfield_entries):
+                    if e.object.identifier:
+                        obj = self.objects[e.object.identifier]
+                        if type(obj) == TSWPArchives.HyperlinkFieldArchive:
+                            start = e.character_index
+                            if i < len(smartfield_entries) - 1:
+                                end = smartfield_entries[i + 1].character_index
+                            else:
+                                end = len(cell_text)
+                            url_text = cell_text[start:end]
+                            hyperlinks.append((url_text, obj.url_ref))
                 bullets = []
                 bullet_chars = []
+                payload_entries = payload_storage.table_para_style.entries
+                table_list_styles = payload_storage.table_list_style.entries
+                offsets = [e.character_index for e in payload_entries]
                 for i, offset in enumerate(offsets):
                     if i == len(offsets) - 1:
                         bullets.append(cell_text[offset:])
@@ -1211,15 +1228,18 @@ class _NumbersModel:
                         number_type = bullet_style.number_types[0]
                         bullet_char = formatted_number(number_type, i)
                     else:
-                        bullet_char = ""
+                        bullet_char = None
                     bullet_chars.append(bullet_char)
                 return {
                     "text": cell_text,
+                    "bulleted": any([c is not None for c in bullet_chars]),
                     "bullets": bullets,
                     "bullet_chars": bullet_chars,
+                    "hyperlinks": hyperlinks,
                 }
         return None