numbers-parser 3.9.7__tar.gz → 3.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/PKG-INFO +11 -2
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/README.md +10 -1
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/pyproject.toml +2 -3
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/_cat_numbers.py +14 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/_unpack_numbers.py +15 -1
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/cell.py +22 -10
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/cell_storage.py +2 -28
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/constants.py +1 -1
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/file.py +7 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/iwafile.py +6 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/model.py +31 -11
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/LICENSE.rst +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/__init__.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/bullets.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/containers.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/data/empty.numbers +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/document.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/exceptions.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/formula.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/functionmap.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TNArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TNArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TNCommandArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TNCommandArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSAArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSAArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSACommandArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCEArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCH3DArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHArchives_Common_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHArchives_GEN_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHCommandArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHPreUFFArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSDArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSDArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSDCommandArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSKArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSKArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSPArchiveMessages_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSPDatabaseMessages_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSPMessages_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSSArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSSArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSTArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSTArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSTCommandArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSTStylePropertyArchiving_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSWPArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSWPArchives_sos_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSWPCommandArchives_pb2.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/__init__.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/mapping.py +0 -0
- {numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/numbers_uuid.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: numbers-parser
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.10.1
|
|
4
4
|
Summary: Read and write Apple Numbers spreadsheets
|
|
5
5
|
Home-page: https://github.com/masaccio/numbers-parser
|
|
6
6
|
License: MIT
|
|
@@ -200,6 +200,15 @@ else:
|
|
|
200
200
|
|
|
201
201
|
Bulleted and numbered data can also be extracted with the bullet or number characters present in the text for each line in the cell in the same way as above but using the `formatted_bullets` property. A single space is inserted between the bullet character and the text string and in the case of bullets, this will be the Unicode character seen in Numbers, for example `"• some text"`.
|
|
202
202
|
|
|
203
|
+
### Hyperlinks
|
|
204
|
+
|
|
205
|
+
Numbers does not support hyperlinks to cells within a spreadsheet, but does allow embedding links in cells. When cells contain hyperlinks, `numbers_parser` returns the text version of the cell. The `hyperlinks` property of cells where `is_bulleted` is `True` is a list of text and URL tuples:
|
|
206
|
+
|
|
207
|
+
``` python
|
|
208
|
+
cell = table.cell(0, 0)
|
|
209
|
+
(text, url) = cell.hyperlinks[0]
|
|
210
|
+
```
|
|
211
|
+
|
|
203
212
|
### Cell images
|
|
204
213
|
|
|
205
214
|
Querying cell formats is currently limited to image backrgounds only. If a cell has no background image, `None` is returned for all calls.
|
|
@@ -351,7 +360,7 @@ The default protobuf package installation may not include the C++ optimised vers
|
|
|
351
360
|
|
|
352
361
|
To include the C++ support, download a released version of Google protobuf [from github](https://github.com/protocolbuffers/protobuf). Build instructions are described in [`src/README.md`](https://github.com/protocolbuffers/protobuf/blob/main/src/README).These have changed greatly over time, but as of April 2023, this was useful:
|
|
353
362
|
|
|
354
|
-
```
|
|
363
|
+
``` shell
|
|
355
364
|
bazel build :protoc :protobuf
|
|
356
365
|
cmake . -DCMAKE_CXX_STANDARD=14
|
|
357
366
|
cmake --build . --parallel 8
|
|
@@ -172,6 +172,15 @@ else:
|
|
|
172
172
|
|
|
173
173
|
Bulleted and numbered data can also be extracted with the bullet or number characters present in the text for each line in the cell in the same way as above but using the `formatted_bullets` property. A single space is inserted between the bullet character and the text string and in the case of bullets, this will be the Unicode character seen in Numbers, for example `"• some text"`.
|
|
174
174
|
|
|
175
|
+
### Hyperlinks
|
|
176
|
+
|
|
177
|
+
Numbers does not support hyperlinks to cells within a spreadsheet, but does allow embedding links in cells. When cells contain hyperlinks, `numbers_parser` returns the text version of the cell. The `hyperlinks` property of cells where `is_bulleted` is `True` is a list of text and URL tuples:
|
|
178
|
+
|
|
179
|
+
``` python
|
|
180
|
+
cell = table.cell(0, 0)
|
|
181
|
+
(text, url) = cell.hyperlinks[0]
|
|
182
|
+
```
|
|
183
|
+
|
|
175
184
|
### Cell images
|
|
176
185
|
|
|
177
186
|
Querying cell formats is currently limited to image backrgounds only. If a cell has no background image, `None` is returned for all calls.
|
|
@@ -323,7 +332,7 @@ The default protobuf package installation may not include the C++ optimised vers
|
|
|
323
332
|
|
|
324
333
|
To include the C++ support, download a released version of Google protobuf [from github](https://github.com/protocolbuffers/protobuf). Build instructions are described in [`src/README.md`](https://github.com/protocolbuffers/protobuf/blob/main/src/README).These have changed greatly over time, but as of April 2023, this was useful:
|
|
325
334
|
|
|
326
|
-
```
|
|
335
|
+
``` shell
|
|
327
336
|
bazel build :protoc :protobuf
|
|
328
337
|
cmake . -DCMAKE_CXX_STANDARD=14
|
|
329
338
|
cmake --build . --parallel 8
|
|
@@ -12,7 +12,7 @@ name = "numbers-parser"
|
|
|
12
12
|
packages = [{include = "numbers_parser", from = "src"}]
|
|
13
13
|
readme = "README.md"
|
|
14
14
|
repository = "https://github.com/masaccio/numbers-parser"
|
|
15
|
-
version = "3.
|
|
15
|
+
version = "3.10.1"
|
|
16
16
|
|
|
17
17
|
[tool.poetry.scripts]
|
|
18
18
|
cat-numbers = "numbers_parser._cat_numbers:main"
|
|
@@ -50,8 +50,7 @@ omit = ["src/numbers_parser/generated/*.py"]
|
|
|
50
50
|
directory = "coverage_html_report"
|
|
51
51
|
|
|
52
52
|
[tool.pytest.ini_options]
|
|
53
|
-
addopts = "
|
|
54
|
-
minversion = 6.0
|
|
53
|
+
addopts = "--cov=src/numbers_parser --cov-report=term-missing"
|
|
55
54
|
|
|
56
55
|
[tool.tox]
|
|
57
56
|
legacy_tox_ini = """
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import csv
|
|
3
|
+
import logging
|
|
3
4
|
import sys
|
|
4
5
|
|
|
5
6
|
from numbers_parser import Document, _get_version
|
|
7
|
+
from numbers_parser import __name__ as numbers_parser_name
|
|
6
8
|
from numbers_parser.exceptions import FileFormatError
|
|
7
9
|
from numbers_parser.cell import ErrorCell
|
|
8
10
|
|
|
11
|
+
logger = logging.getLogger(numbers_parser_name)
|
|
12
|
+
|
|
9
13
|
|
|
10
14
|
def command_line_parser():
|
|
11
15
|
parser = argparse.ArgumentParser(
|
|
@@ -49,6 +53,9 @@ def command_line_parser():
|
|
|
49
53
|
"-t", "--table", action="append", help="Names of table(s) to include in export"
|
|
50
54
|
)
|
|
51
55
|
parser.add_argument("document", nargs="*", help="Document(s) to export")
|
|
56
|
+
parser.add_argument(
|
|
57
|
+
"--debug", default=False, action="store_true", help="Enable debug logging"
|
|
58
|
+
)
|
|
52
59
|
return parser
|
|
53
60
|
|
|
54
61
|
|
|
@@ -100,6 +107,13 @@ def main():
|
|
|
100
107
|
elif len(args.document) == 0:
|
|
101
108
|
parser.print_help()
|
|
102
109
|
else:
|
|
110
|
+
hdlr = logging.StreamHandler()
|
|
111
|
+
hdlr.setFormatter(logging.Formatter("%(levelname)s:%(name)s:%(message)s"))
|
|
112
|
+
logger.addHandler(hdlr)
|
|
113
|
+
if args.debug:
|
|
114
|
+
logger.setLevel("DEBUG")
|
|
115
|
+
else:
|
|
116
|
+
logger.setLevel("ERROR")
|
|
103
117
|
for filename in args.document:
|
|
104
118
|
try:
|
|
105
119
|
if args.list_sheets:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
import regex
|
|
4
5
|
import sys
|
|
5
6
|
|
|
@@ -9,14 +10,17 @@ from base64 import b64decode
|
|
|
9
10
|
from binascii import hexlify
|
|
10
11
|
from compact_json import Formatter
|
|
11
12
|
|
|
12
|
-
|
|
13
13
|
from numbers_parser.file import read_numbers_file
|
|
14
14
|
from numbers_parser import _get_version
|
|
15
|
+
from numbers_parser import __name__ as numbers_parser_name
|
|
15
16
|
from numbers_parser.iwafile import IWAFile
|
|
16
17
|
from numbers_parser.exceptions import FileFormatError, UnsupportedError, FileError
|
|
17
18
|
from numbers_parser.numbers_uuid import NumbersUUID
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
logger = logging.getLogger(numbers_parser_name)
|
|
22
|
+
|
|
23
|
+
|
|
20
24
|
def ensure_directory_exists(prefix, path):
|
|
21
25
|
"""Ensure that a path's directory exists."""
|
|
22
26
|
parts = os.path.split(path)
|
|
@@ -113,6 +117,9 @@ def main():
|
|
|
113
117
|
"--pretty", action="store_true", help="Enable all prettifying options"
|
|
114
118
|
)
|
|
115
119
|
parser.add_argument("--output", "-o", help="directory name to unpack into")
|
|
120
|
+
parser.add_argument(
|
|
121
|
+
"--debug", default=False, action="store_true", help="Enable debug logging"
|
|
122
|
+
)
|
|
116
123
|
args = parser.parse_args()
|
|
117
124
|
if args.version:
|
|
118
125
|
print(_get_version())
|
|
@@ -125,6 +132,13 @@ def main():
|
|
|
125
132
|
elif len(args.document) == 0:
|
|
126
133
|
parser.print_help()
|
|
127
134
|
else:
|
|
135
|
+
hdlr = logging.StreamHandler()
|
|
136
|
+
hdlr.setFormatter(logging.Formatter("%(levelname)s:%(name)s:%(message)s"))
|
|
137
|
+
logger.addHandler(hdlr)
|
|
138
|
+
if args.debug:
|
|
139
|
+
logger.setLevel("DEBUG")
|
|
140
|
+
else:
|
|
141
|
+
logger.setLevel("ERROR")
|
|
128
142
|
for document in args.document:
|
|
129
143
|
output_dir = args.output or document.replace(".numbers", "")
|
|
130
144
|
try:
|
|
@@ -6,6 +6,7 @@ from numbers_parser.cell_storage import CellType, CellStorage
|
|
|
6
6
|
|
|
7
7
|
from pendulum import duration, Duration, DateTime
|
|
8
8
|
from functools import lru_cache
|
|
9
|
+
from typing import List, Tuple
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class Cell:
|
|
@@ -48,8 +49,8 @@ class Cell:
|
|
|
48
49
|
cell = DurationCell(*row_col, value)
|
|
49
50
|
elif cell_storage.type == CellType.ERROR:
|
|
50
51
|
cell = ErrorCell(*row_col)
|
|
51
|
-
elif cell_storage.type == CellType.
|
|
52
|
-
cell =
|
|
52
|
+
elif cell_storage.type == CellType.RICH_TEXT:
|
|
53
|
+
cell = RichTextCell(*row_col, cell_storage.value)
|
|
53
54
|
else:
|
|
54
55
|
raise UnsupportedError( # pragma: no cover
|
|
55
56
|
f"Unsupport cell type {cell_storage.type} "
|
|
@@ -140,18 +141,20 @@ class TextCell(Cell):
|
|
|
140
141
|
return self._value
|
|
141
142
|
|
|
142
143
|
|
|
143
|
-
class
|
|
144
|
+
class RichTextCell(Cell):
|
|
144
145
|
def __init__(self, row_num: int, col_num: int, value):
|
|
145
146
|
self._type = TSTArchives.automaticCellType
|
|
146
147
|
super().__init__(row_num, col_num, value["text"])
|
|
147
148
|
self._bullets = value["bullets"]
|
|
148
|
-
self.
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
149
|
+
self._hyperlinks = value["hyperlinks"]
|
|
150
|
+
if value["bulleted"]:
|
|
151
|
+
self._formatted_bullets = [
|
|
152
|
+
value["bullet_chars"][i] + " " + value["bullets"][i]
|
|
153
|
+
if value["bullet_chars"][i] is not None
|
|
154
|
+
else value["bullets"][i]
|
|
155
|
+
for i in range(len(self._bullets))
|
|
156
|
+
]
|
|
157
|
+
self.is_bulleted = True
|
|
155
158
|
|
|
156
159
|
@property
|
|
157
160
|
def value(self) -> str:
|
|
@@ -165,6 +168,15 @@ class BulletedTextCell(Cell):
|
|
|
165
168
|
def formatted_bullets(self) -> str:
|
|
166
169
|
return self._formatted_bullets
|
|
167
170
|
|
|
171
|
+
@property
|
|
172
|
+
def hyperlinks(self) -> List[Tuple]:
|
|
173
|
+
return self._hyperlinks
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Backwards compatibility to earlier class names
|
|
177
|
+
class BulletedTextCell(RichTextCell):
|
|
178
|
+
pass
|
|
179
|
+
|
|
168
180
|
|
|
169
181
|
class EmptyCell(Cell):
|
|
170
182
|
def __init__(self, row_num: int, col_num: int):
|
|
@@ -94,32 +94,6 @@ CELL_STORAGE_MAP_V5 = OrderedDict(
|
|
|
94
94
|
]
|
|
95
95
|
)
|
|
96
96
|
|
|
97
|
-
# CELL_STORAGE_MAP_V4 = OrderedDict(
|
|
98
|
-
# [
|
|
99
|
-
# (0x2, {"attr": "cell_style_id"}),
|
|
100
|
-
# (0x80, {"attr": "text_style_id"}), # SheetJS skips
|
|
101
|
-
# (0x400, {"attr": "conditional_style_id"}), # SheetJS skips
|
|
102
|
-
# (0x800, {"attr": "conditional_style_rule_id"}), # SheetJS skips
|
|
103
|
-
# (0x4, {"attr": "current_format_id"}), # SheetJS skips
|
|
104
|
-
# (0x8, {"attr": "formula_id"}), # SheetJS skips
|
|
105
|
-
# (0x100, {"attr": "formula_error_id"}), # SheetJS skips
|
|
106
|
-
# (0x200, {"attr": "rich_id"}),
|
|
107
|
-
# (0x1000, {"attr": "comment_id"}), # SheetJS skips
|
|
108
|
-
# (0x2000, {"attr": "import_warning_id"}), # SheetJS skips
|
|
109
|
-
# (0x10, {"attr": "string_id"}),
|
|
110
|
-
# (0x20, {"attr": "double", "size": 8}),
|
|
111
|
-
# (0x40, {"attr": "seconds", "size": 8}),
|
|
112
|
-
# (0x10000, {"attr": "num_format_id"}), # SheetJS skips
|
|
113
|
-
# (0x80000, {"attr": "currency_format_id"}), # SheetJS skips
|
|
114
|
-
# (0x20000, {"attr": "date_format_id"}), # SheetJS skips
|
|
115
|
-
# (0x40000, {"attr": "duration_format_id"}), # SheetJS skips
|
|
116
|
-
# (0x100000, {"attr": "control_format_id"}), # SheetJS skips
|
|
117
|
-
# (0x200000, {"attr": "custom_format_id"}), # SheetJS skips
|
|
118
|
-
# (0x400000, {"attr": "base_format_id"}), # SheetJS skips
|
|
119
|
-
# (0x800000, {"attr": "multiple_choice_id"}), # SheetJS skips
|
|
120
|
-
# ]
|
|
121
|
-
# )
|
|
122
|
-
|
|
123
97
|
|
|
124
98
|
class CellStorage:
|
|
125
99
|
def __init__( # noqa: C901
|
|
@@ -178,8 +152,8 @@ class CellStorage:
|
|
|
178
152
|
self.value = None
|
|
179
153
|
self.type = CellType.ERROR
|
|
180
154
|
elif cell_type == TSTArchives.automaticCellType:
|
|
181
|
-
self.value = self.model.
|
|
182
|
-
self.type = CellType.
|
|
155
|
+
self.value = self.model.table_rich_text(self.table_id, self.rich_id)
|
|
156
|
+
self.type = CellType.RICH_TEXT
|
|
183
157
|
elif cell_type == 10:
|
|
184
158
|
self.value = self.d128
|
|
185
159
|
self.type = CellType.NUMBER
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
1
3
|
from io import BytesIO
|
|
2
4
|
from zipfile import ZipFile, BadZipFile
|
|
3
5
|
|
|
@@ -6,8 +8,12 @@ from numbers_parser.exceptions import FileError, FileFormatError
|
|
|
6
8
|
|
|
7
9
|
import os
|
|
8
10
|
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
debug = logger.debug
|
|
13
|
+
|
|
9
14
|
|
|
10
15
|
def read_numbers_file(path, file_handler=None, object_handler=None):
|
|
16
|
+
debug("read_numbers_file: path=%s", path)
|
|
11
17
|
if os.path.isdir(path):
|
|
12
18
|
if os.path.isfile(os.path.join(path, "Index.zip")):
|
|
13
19
|
get_objects_from_zip_file(
|
|
@@ -80,6 +86,7 @@ def extract_iwa_archives(blob, filename, file_handler, object_handler):
|
|
|
80
86
|
return
|
|
81
87
|
|
|
82
88
|
try:
|
|
89
|
+
debug("extract_iwa_archives: filename=%s", filename)
|
|
83
90
|
iwaf = IWAFile.from_buffer(blob, filename)
|
|
84
91
|
except Exception as e: # pragma: no cover
|
|
85
92
|
raise FileFormatError(f"{filename}: invalid IWA file {filename}") from e
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# Forked from https://github.com/psobot/keynote-parser/blob/master/keynote_parser/codec.py
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
import struct
|
|
4
5
|
import snappy
|
|
5
6
|
|
|
@@ -15,6 +16,9 @@ from google.protobuf.internal.decoder import _DecodeVarint32
|
|
|
15
16
|
from google.protobuf.json_format import MessageToDict, ParseDict
|
|
16
17
|
from google.protobuf.message import EncodeError
|
|
17
18
|
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
debug = logger.debug
|
|
21
|
+
|
|
18
22
|
|
|
19
23
|
class IWAFile(object):
|
|
20
24
|
def __init__(self, chunks, filename=None):
|
|
@@ -26,6 +30,7 @@ class IWAFile(object):
|
|
|
26
30
|
try:
|
|
27
31
|
chunks = []
|
|
28
32
|
while data:
|
|
33
|
+
debug("from_buffer: filename=%s len=%d", filename, len(data))
|
|
29
34
|
chunk, data = IWACompressedChunk.from_buffer(data, filename)
|
|
30
35
|
chunks.append(chunk)
|
|
31
36
|
|
|
@@ -88,6 +93,7 @@ class IWACompressedChunk(object):
|
|
|
88
93
|
data = b"".join(cls._decompress_all(data))
|
|
89
94
|
archives = []
|
|
90
95
|
while data:
|
|
96
|
+
debug("from_buffer: filename=%s len=%d", filename, len(data))
|
|
91
97
|
archive, data = IWAArchiveSegment.from_buffer(data, filename)
|
|
92
98
|
archives.append(archive)
|
|
93
99
|
return cls(archives), None
|
|
@@ -47,6 +47,7 @@ from numbers_parser.generated import TSPMessages_pb2 as TSPMessages
|
|
|
47
47
|
from numbers_parser.generated import TSPArchiveMessages_pb2 as TSPArchiveMessages
|
|
48
48
|
from numbers_parser.generated import TSTArchives_pb2 as TSTArchives
|
|
49
49
|
from numbers_parser.generated import TSCEArchives_pb2 as TSCEArchives
|
|
50
|
+
from numbers_parser.generated import TSWPArchives_pb2 as TSWPArchives
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
class DataLists:
|
|
@@ -182,9 +183,12 @@ class _NumbersModel:
|
|
|
182
183
|
# },
|
|
183
184
|
row_bucket_map = {i: None for i in range(self.objects[table_id].number_of_rows)}
|
|
184
185
|
bds = self.objects[table_id].base_data_store
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
186
|
+
bucket_ids = [x.identifier for x in bds.rowHeaders.buckets]
|
|
187
|
+
idx = 0
|
|
188
|
+
for bucket_id in bucket_ids:
|
|
189
|
+
for header in self.objects[bucket_id].headers:
|
|
190
|
+
row_bucket_map[header.index] = idx
|
|
191
|
+
idx += 1
|
|
188
192
|
return row_bucket_map
|
|
189
193
|
|
|
190
194
|
def number_of_rows(self, table_id, num_rows=None):
|
|
@@ -1144,10 +1148,9 @@ class _NumbersModel:
|
|
|
1144
1148
|
return cell
|
|
1145
1149
|
|
|
1146
1150
|
@lru_cache(maxsize=None)
|
|
1147
|
-
def
|
|
1151
|
+
def table_rich_text(self, table_id: int, string_key: int) -> Dict:
|
|
1148
1152
|
"""
|
|
1149
|
-
Extract bullets from a rich text data cell.
|
|
1150
|
-
Returns None if the cell is not rich text
|
|
1153
|
+
Extract bullets and hyperlinks from a rich text data cell.
|
|
1151
1154
|
"""
|
|
1152
1155
|
# The table model base data store contains a richTextTable field
|
|
1153
1156
|
# which is a reference to a TST.TableDataList. The TableDataList
|
|
@@ -1186,13 +1189,27 @@ class _NumbersModel:
|
|
|
1186
1189
|
if string_key == entry.key:
|
|
1187
1190
|
payload = self.objects[entry.rich_text_payload.identifier]
|
|
1188
1191
|
payload_storage = self.objects[payload.storage.identifier]
|
|
1189
|
-
|
|
1190
|
-
table_list_styles = payload_storage.table_list_style.entries
|
|
1191
|
-
offsets = [e.character_index for e in payload_entries]
|
|
1192
|
-
|
|
1192
|
+
smartfield_entries = payload_storage.table_smartfield.entries
|
|
1193
1193
|
cell_text = payload_storage.text[0]
|
|
1194
|
+
|
|
1195
|
+
hyperlinks = []
|
|
1196
|
+
for i, e in enumerate(smartfield_entries):
|
|
1197
|
+
if e.object.identifier:
|
|
1198
|
+
obj = self.objects[e.object.identifier]
|
|
1199
|
+
if type(obj) == TSWPArchives.HyperlinkFieldArchive:
|
|
1200
|
+
start = e.character_index
|
|
1201
|
+
if i < len(smartfield_entries) - 1:
|
|
1202
|
+
end = smartfield_entries[i + 1].character_index
|
|
1203
|
+
else:
|
|
1204
|
+
end = len(cell_text)
|
|
1205
|
+
url_text = cell_text[start:end]
|
|
1206
|
+
hyperlinks.append((url_text, obj.url_ref))
|
|
1207
|
+
|
|
1194
1208
|
bullets = []
|
|
1195
1209
|
bullet_chars = []
|
|
1210
|
+
payload_entries = payload_storage.table_para_style.entries
|
|
1211
|
+
table_list_styles = payload_storage.table_list_style.entries
|
|
1212
|
+
offsets = [e.character_index for e in payload_entries]
|
|
1196
1213
|
for i, offset in enumerate(offsets):
|
|
1197
1214
|
if i == len(offsets) - 1:
|
|
1198
1215
|
bullets.append(cell_text[offset:])
|
|
@@ -1211,15 +1228,18 @@ class _NumbersModel:
|
|
|
1211
1228
|
number_type = bullet_style.number_types[0]
|
|
1212
1229
|
bullet_char = formatted_number(number_type, i)
|
|
1213
1230
|
else:
|
|
1214
|
-
bullet_char =
|
|
1231
|
+
bullet_char = None
|
|
1215
1232
|
|
|
1216
1233
|
bullet_chars.append(bullet_char)
|
|
1217
1234
|
|
|
1218
1235
|
return {
|
|
1219
1236
|
"text": cell_text,
|
|
1237
|
+
"bulleted": any([c is not None for c in bullet_chars]),
|
|
1220
1238
|
"bullets": bullets,
|
|
1221
1239
|
"bullet_chars": bullet_chars,
|
|
1240
|
+
"hyperlinks": hyperlinks,
|
|
1222
1241
|
}
|
|
1242
|
+
|
|
1223
1243
|
return None
|
|
1224
1244
|
|
|
1225
1245
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TNArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TNArchives_sos_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TNCommandArchives_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSAArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSAArchives_sos_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCEArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCH3DArchives_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHArchives_GEN_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSCHArchives_sos_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSDArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSDArchives_sos_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSKArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSKArchives_sos_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSPMessages_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSSArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSSArchives_sos_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSTArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSTArchives_sos_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSWPArchives_pb2.py
RENAMED
|
File without changes
|
{numbers_parser-3.9.7 → numbers_parser-3.10.1}/src/numbers_parser/generated/TSWPArchives_sos_pb2.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|