numbers-parser 4.14.4__py3-none-any.whl → 4.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numbers_parser/__init__.py +1 -0
- numbers_parser/_unpack_numbers.py +1 -7
- numbers_parser/cell.py +28 -202
- numbers_parser/constants.py +19 -6
- numbers_parser/document.py +54 -11
- numbers_parser/formula.py +423 -56
- numbers_parser/generated/TSCEArchives_pb2.py +209 -193
- numbers_parser/generated/TSSArchives_pb2.py +36 -36
- numbers_parser/generated/TSTArchives_pb2.py +328 -332
- numbers_parser/generated/mapping.py +1 -2
- numbers_parser/model.py +399 -188
- numbers_parser/numbers_cache.py +1 -1
- numbers_parser/numbers_uuid.py +6 -0
- numbers_parser/tokenizer.py +548 -0
- numbers_parser/xrefs.py +850 -0
- {numbers_parser-4.14.4.dist-info → numbers_parser-4.16.1.dist-info}/METADATA +26 -28
- {numbers_parser-4.14.4.dist-info → numbers_parser-4.16.1.dist-info}/RECORD +24 -22
- {numbers_parser-4.14.4.dist-info → numbers_parser-4.16.1.dist-info}/WHEEL +2 -1
- numbers_parser-4.16.1.dist-info/entry_points.txt +4 -0
- numbers_parser-4.16.1.dist-info/top_level.txt +1 -0
- numbers_parser/data/empty.numbers +0 -0
- numbers_parser-4.14.4.dist-info/entry_points.txt +0 -5
- {numbers_parser-4.14.4.dist-info → numbers_parser-4.16.1.dist-info/licenses}/LICENSE.rst +0 -0
numbers_parser/xrefs.py
ADDED
|
@@ -0,0 +1,850 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from contextlib import suppress
|
|
6
|
+
from dataclasses import dataclass, field, replace
|
|
7
|
+
from enum import IntEnum
|
|
8
|
+
|
|
9
|
+
from numbers_parser.constants import OPERATOR_PRECEDENCE
|
|
10
|
+
|
|
11
|
+
__all__ = ["xl_cell_to_rowcol", "xl_col_to_name", "xl_range", "xl_rowcol_to_cell"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TableAxis(IntEnum):
|
|
15
|
+
"""Indicates whether a cache is for rows or columns."""
|
|
16
|
+
|
|
17
|
+
ROW = 1
|
|
18
|
+
COLUMN = 2
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RefScope(IntEnum):
|
|
22
|
+
"""The required scope for a name reference in a document."""
|
|
23
|
+
|
|
24
|
+
"""Name is unique to the document."""
|
|
25
|
+
DOCUMENT = 1
|
|
26
|
+
"""Name is unique to a sheet."""
|
|
27
|
+
SHEET = 2
|
|
28
|
+
"""Name is unique to a table and that table is uniquely named."""
|
|
29
|
+
TABLE = 3
|
|
30
|
+
"""All other names."""
|
|
31
|
+
NONE = 4
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ScopedNameRef:
|
|
36
|
+
name: str
|
|
37
|
+
offset: int = None
|
|
38
|
+
axis: TableAxis = None
|
|
39
|
+
table_id: int = None
|
|
40
|
+
scope: RefScope = RefScope.NONE
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CellRangeType(IntEnum):
|
|
44
|
+
ROW_RANGE = 1
|
|
45
|
+
COL_RANGE = 2
|
|
46
|
+
RANGE = 3
|
|
47
|
+
NAMED_RANGE = 4
|
|
48
|
+
CELL = 5
|
|
49
|
+
NAMED_ROW_COLUMN = 6
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class CellRange:
|
|
54
|
+
model: object = None
|
|
55
|
+
row_start: int | str = None
|
|
56
|
+
row_end: int = None
|
|
57
|
+
col_start: int | str = None
|
|
58
|
+
col_end: int = None
|
|
59
|
+
row_start_is_abs: bool = False
|
|
60
|
+
row_end_is_abs: bool = False
|
|
61
|
+
col_start_is_abs: bool = False
|
|
62
|
+
col_end_is_abs: bool = False
|
|
63
|
+
from_table_id: int = None
|
|
64
|
+
to_table_id: int = None
|
|
65
|
+
range_type: CellRangeType = None
|
|
66
|
+
_table_names: list[str] = field(init=False, default=None, repr=False)
|
|
67
|
+
|
|
68
|
+
def __post_init__(self):
|
|
69
|
+
if self._table_names is None:
|
|
70
|
+
self._initialize_table_data()
|
|
71
|
+
self.model.name_ref_cache.refresh()
|
|
72
|
+
self._set_sheet_ids()
|
|
73
|
+
|
|
74
|
+
def _initialize_table_data(self):
|
|
75
|
+
self._table_names = self.model.table_names()
|
|
76
|
+
self.table_name_unique = {
|
|
77
|
+
name: self._table_names.count(name) == 1 for name in self._table_names
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def _set_sheet_ids(self):
|
|
81
|
+
"""Determine the sheet IDs for the referenced tables."""
|
|
82
|
+
if self.to_table_id is None:
|
|
83
|
+
self.to_table_id = self.from_table_id
|
|
84
|
+
self.from_sheet_id = self.model.table_id_to_sheet_id(self.from_table_id)
|
|
85
|
+
self.to_sheet_id = self.model.table_id_to_sheet_id(self.to_table_id)
|
|
86
|
+
|
|
87
|
+
def expand_ref(self, ref: str, is_abs: bool = False, no_prefix=False) -> str:
|
|
88
|
+
self.model.name_ref_cache.refresh()
|
|
89
|
+
is_document_unique = (
|
|
90
|
+
ref.scope == RefScope.DOCUMENT if isinstance(ref, ScopedNameRef) else False
|
|
91
|
+
)
|
|
92
|
+
is_sheet_unique = ref.scope == RefScope.SHEET if isinstance(ref, ScopedNameRef) else False
|
|
93
|
+
is_table_unique = ref.scope == RefScope.TABLE if isinstance(ref, ScopedNameRef) else False
|
|
94
|
+
|
|
95
|
+
if isinstance(ref, ScopedNameRef):
|
|
96
|
+
ref_str = f"${ref.name}" if is_abs else ref.name
|
|
97
|
+
else:
|
|
98
|
+
ref_str = f"${ref}" if is_abs else ref
|
|
99
|
+
if any(x in ref_str for x in OPERATOR_PRECEDENCE):
|
|
100
|
+
ref_str = f"'{ref_str}'"
|
|
101
|
+
elif "'" in ref_str:
|
|
102
|
+
ref_str = ref_str.replace("'", "'''")
|
|
103
|
+
|
|
104
|
+
if no_prefix or is_document_unique:
|
|
105
|
+
return ref_str
|
|
106
|
+
|
|
107
|
+
if self.from_table_id == self.to_table_id:
|
|
108
|
+
return ref_str
|
|
109
|
+
|
|
110
|
+
table_name = self.model.table_name(self.to_table_id)
|
|
111
|
+
if self.from_sheet_id == self.to_sheet_id and is_sheet_unique:
|
|
112
|
+
# If absolute Numbers seems to unnecessarily include the table name
|
|
113
|
+
return f"{table_name}::{ref_str}" if is_abs else ref_str
|
|
114
|
+
|
|
115
|
+
is_table_unique |= self.table_name_unique[table_name]
|
|
116
|
+
if self.from_sheet_id == self.to_sheet_id or is_table_unique:
|
|
117
|
+
return f"{table_name}::{ref_str}"
|
|
118
|
+
|
|
119
|
+
sheet_name = self.model.sheet_name(self.to_sheet_id)
|
|
120
|
+
return f"{sheet_name}::{table_name}::{ref_str}"
|
|
121
|
+
|
|
122
|
+
def __str__(self):
|
|
123
|
+
self.model.name_ref_cache.refresh()
|
|
124
|
+
# Handle row-only ranges
|
|
125
|
+
if self.col_start is None:
|
|
126
|
+
row_range = self.model.name_ref_cache.row_ranges[self.to_table_id]
|
|
127
|
+
return self._format_row_range(self.row_start, self.row_end, row_range)
|
|
128
|
+
|
|
129
|
+
# Handle column-only ranges
|
|
130
|
+
if self.row_start is None:
|
|
131
|
+
col_range = self.model.name_ref_cache.col_ranges[self.to_table_id]
|
|
132
|
+
return self._format_col_range(self.col_start, self.col_end, col_range)
|
|
133
|
+
|
|
134
|
+
# Handle full cell ranges
|
|
135
|
+
return self._format_cell_range(self.row_start, self.col_start, self.row_end, self.col_end)
|
|
136
|
+
|
|
137
|
+
def _format_row_range(self, row_start, row_end, row_range):
|
|
138
|
+
"""Formats a row-only range."""
|
|
139
|
+
if row_end is None:
|
|
140
|
+
return self._format_single_row(row_start, row_range)
|
|
141
|
+
return self._format_row_span(row_start, row_end, row_range)
|
|
142
|
+
|
|
143
|
+
def _format_single_row(self, row_start, row_range):
|
|
144
|
+
"""Formats a single row, either numeric or named."""
|
|
145
|
+
if row_range[row_start] is None:
|
|
146
|
+
return self._format_numeric_row(row_start)
|
|
147
|
+
return self.expand_ref(row_range[row_start], self.row_start_is_abs)
|
|
148
|
+
|
|
149
|
+
def _format_numeric_row(self, row_start):
|
|
150
|
+
"""Formats a single numeric row."""
|
|
151
|
+
return ":".join(
|
|
152
|
+
[
|
|
153
|
+
self.expand_ref(str(row_start + 1), self.row_start_is_abs),
|
|
154
|
+
self.expand_ref(str(row_start + 1), self.row_start_is_abs, no_prefix=True),
|
|
155
|
+
],
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def _format_row_span(self, row_start, row_end, row_range):
|
|
159
|
+
"""Formats a range of rows."""
|
|
160
|
+
if row_range[row_start] is None:
|
|
161
|
+
return ":".join(
|
|
162
|
+
[
|
|
163
|
+
self.expand_ref(str(row_start + 1), self.row_start_is_abs),
|
|
164
|
+
self.expand_ref(str(row_end + 1), self.row_end_is_abs, no_prefix=True),
|
|
165
|
+
],
|
|
166
|
+
)
|
|
167
|
+
return ":".join(
|
|
168
|
+
[
|
|
169
|
+
self.expand_ref(
|
|
170
|
+
row_range[row_start],
|
|
171
|
+
self.row_start_is_abs,
|
|
172
|
+
no_prefix=row_range[row_start].scope == RefScope.DOCUMENT
|
|
173
|
+
or row_range[row_end].scope == RefScope.DOCUMENT,
|
|
174
|
+
),
|
|
175
|
+
self.expand_ref(row_range[row_end], self.row_end_is_abs, no_prefix=True),
|
|
176
|
+
],
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
def _format_col_range(self, col_start, col_end, col_range):
|
|
180
|
+
"""Formats a column-only range."""
|
|
181
|
+
if col_end is None:
|
|
182
|
+
return self._format_single_column(col_start, col_range)
|
|
183
|
+
return self._format_column_span(col_start, col_end, col_range)
|
|
184
|
+
|
|
185
|
+
def _format_single_column(self, col_start, col_range):
|
|
186
|
+
"""Formats a single column, either numeric or named."""
|
|
187
|
+
if col_range[col_start] is None:
|
|
188
|
+
return self.expand_ref(xl_col_to_name(col_start, col_abs=self.col_start_is_abs))
|
|
189
|
+
return self.expand_ref(col_range[col_start], self.col_start_is_abs)
|
|
190
|
+
|
|
191
|
+
def _format_column_span(self, col_start, col_end, col_range):
|
|
192
|
+
"""Formats a range of columns."""
|
|
193
|
+
if col_range[col_start] is None:
|
|
194
|
+
return f"{self.expand_ref(xl_col_to_name(col_start, col_abs=self.col_start_is_abs))}:{self.expand_ref(xl_col_to_name(col_end, col_abs=self.col_end_is_abs), no_prefix=True)}"
|
|
195
|
+
return ":".join(
|
|
196
|
+
[
|
|
197
|
+
self.expand_ref(
|
|
198
|
+
col_range[col_start],
|
|
199
|
+
self.col_start_is_abs,
|
|
200
|
+
no_prefix=col_range[col_start].scope == RefScope.DOCUMENT
|
|
201
|
+
or col_range[col_end].scope == RefScope.DOCUMENT,
|
|
202
|
+
),
|
|
203
|
+
self.expand_ref(col_range[col_end], self.col_end_is_abs, no_prefix=True),
|
|
204
|
+
],
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _format_cell_range(self, row_start, col_start, row_end, col_end):
|
|
208
|
+
"""Formats a full cell range."""
|
|
209
|
+
if row_end is None or col_end is None:
|
|
210
|
+
return self.expand_ref(
|
|
211
|
+
xl_rowcol_to_cell(
|
|
212
|
+
row_start,
|
|
213
|
+
col_start,
|
|
214
|
+
row_abs=self.row_start_is_abs,
|
|
215
|
+
col_abs=self.col_start_is_abs,
|
|
216
|
+
),
|
|
217
|
+
)
|
|
218
|
+
return ":".join(
|
|
219
|
+
[
|
|
220
|
+
self.expand_ref(
|
|
221
|
+
xl_rowcol_to_cell(
|
|
222
|
+
row_start,
|
|
223
|
+
col_start,
|
|
224
|
+
row_abs=self.row_start_is_abs,
|
|
225
|
+
col_abs=self.col_start_is_abs,
|
|
226
|
+
),
|
|
227
|
+
),
|
|
228
|
+
self.expand_ref(
|
|
229
|
+
xl_rowcol_to_cell(
|
|
230
|
+
row_end,
|
|
231
|
+
col_end,
|
|
232
|
+
row_abs=self.row_end_is_abs,
|
|
233
|
+
col_abs=self.col_end_is_abs,
|
|
234
|
+
),
|
|
235
|
+
no_prefix=True,
|
|
236
|
+
),
|
|
237
|
+
],
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class ScopedNameRefCache:
|
|
242
|
+
def __init__(self, model):
|
|
243
|
+
self.model = model
|
|
244
|
+
self.doc_name_refs = {}
|
|
245
|
+
self.sheet_name_refs = {}
|
|
246
|
+
self.table_name_refs = {}
|
|
247
|
+
self.row_ranges = {}
|
|
248
|
+
self.col_ranges = {}
|
|
249
|
+
self._dirty_cache = True
|
|
250
|
+
self.table_names = []
|
|
251
|
+
|
|
252
|
+
def mark_dirty(self):
|
|
253
|
+
self._dirty_cache = True
|
|
254
|
+
|
|
255
|
+
def refresh(self):
|
|
256
|
+
if self._dirty_cache:
|
|
257
|
+
self.calculate_named_ranges()
|
|
258
|
+
self._dirty_cache = False
|
|
259
|
+
|
|
260
|
+
@staticmethod
|
|
261
|
+
def _exact_count(pool: list, value: int | str | bool):
|
|
262
|
+
return sum(1 for x in pool if type(x) is type(value) and x == value)
|
|
263
|
+
|
|
264
|
+
def _row_data(self, table_id: int, row: int) -> int | str | bool | None:
|
|
265
|
+
num_header_cols = self.model.num_header_cols(table_id)
|
|
266
|
+
return self.model._table_data[table_id][row][num_header_cols - 1].formatted_value
|
|
267
|
+
|
|
268
|
+
def _column_data(self, table_id: int, col: int) -> int | str | bool | None:
|
|
269
|
+
num_header_rows = self.model.num_header_rows(table_id)
|
|
270
|
+
return self.model._table_data[table_id][num_header_rows - 1][col].formatted_value
|
|
271
|
+
|
|
272
|
+
def _calculate_name_scopes(
|
|
273
|
+
self,
|
|
274
|
+
sheet_id: int,
|
|
275
|
+
table_id: int,
|
|
276
|
+
axis: TableAxis,
|
|
277
|
+
) -> dict[int, str | None]:
|
|
278
|
+
num_header_rows = self.model.num_header_rows(table_id)
|
|
279
|
+
num_header_cols = self.model.num_header_cols(table_id)
|
|
280
|
+
|
|
281
|
+
if axis == TableAxis.ROW:
|
|
282
|
+
data_lookup = self._row_data
|
|
283
|
+
range_start = num_header_rows
|
|
284
|
+
range_end = self.model.number_of_rows(table_id)
|
|
285
|
+
first_offset = num_header_rows
|
|
286
|
+
else:
|
|
287
|
+
data_lookup = self._column_data
|
|
288
|
+
range_start = num_header_cols
|
|
289
|
+
range_end = self.model.number_of_columns(table_id)
|
|
290
|
+
first_offset = num_header_cols
|
|
291
|
+
|
|
292
|
+
if (axis == TableAxis.ROW and num_header_cols == 0) or (
|
|
293
|
+
axis == TableAxis.COLUMN and num_header_rows == 0
|
|
294
|
+
):
|
|
295
|
+
return {idx: None for idx in range(range_end)}
|
|
296
|
+
|
|
297
|
+
scopes = {}
|
|
298
|
+
names = []
|
|
299
|
+
all_names = [data_lookup(table_id, idx) for idx in range(range_start, range_end)]
|
|
300
|
+
for idx in range(range_end):
|
|
301
|
+
if idx < first_offset:
|
|
302
|
+
names.append(None)
|
|
303
|
+
scopes[idx] = None
|
|
304
|
+
else:
|
|
305
|
+
name = data_lookup(table_id, idx)
|
|
306
|
+
if name is None:
|
|
307
|
+
scopes[idx] = None
|
|
308
|
+
elif self._exact_count(all_names, name) > 1:
|
|
309
|
+
names.append(None)
|
|
310
|
+
scopes[idx] = None
|
|
311
|
+
else:
|
|
312
|
+
names.append(name)
|
|
313
|
+
scopes[idx] = name
|
|
314
|
+
|
|
315
|
+
for name in names:
|
|
316
|
+
self.doc_name_refs[name] += 1
|
|
317
|
+
self.sheet_name_refs[sheet_id][name] += 1
|
|
318
|
+
|
|
319
|
+
return scopes
|
|
320
|
+
|
|
321
|
+
def _calculate_scope_types(
|
|
322
|
+
self,
|
|
323
|
+
sheet_id: int,
|
|
324
|
+
table_id: int,
|
|
325
|
+
axis: TableAxis,
|
|
326
|
+
scopes: dict[int, CellRange | None],
|
|
327
|
+
):
|
|
328
|
+
"""
|
|
329
|
+
For any locally unique row/column names, tag whether they are table-unique,
|
|
330
|
+
sheet-unique or document-unique names.
|
|
331
|
+
"""
|
|
332
|
+
if axis == TableAxis.ROW:
|
|
333
|
+
axis_range = range(self.model.number_of_rows(table_id))
|
|
334
|
+
else:
|
|
335
|
+
axis_range = range(self.model.number_of_columns(table_id))
|
|
336
|
+
|
|
337
|
+
table_name = self.model.table_name(table_id)
|
|
338
|
+
for idx in axis_range:
|
|
339
|
+
name = scopes[idx]
|
|
340
|
+
scope = ScopedNameRef(name, axis=axis, table_id=table_id, offset=idx)
|
|
341
|
+
if name is None:
|
|
342
|
+
continue
|
|
343
|
+
if name in self.doc_name_refs:
|
|
344
|
+
scopes[idx] = replace(scope, scope=RefScope.DOCUMENT)
|
|
345
|
+
self.doc_name_refs[name] = scopes[idx]
|
|
346
|
+
elif name in self.sheet_name_refs[sheet_id]:
|
|
347
|
+
scopes[idx] = replace(scope, scope=RefScope.SHEET)
|
|
348
|
+
self.sheet_name_refs[sheet_id][name] = scopes[idx]
|
|
349
|
+
else:
|
|
350
|
+
scope_type = (
|
|
351
|
+
RefScope.TABLE
|
|
352
|
+
if self._exact_count(self.table_names, table_name) == 1
|
|
353
|
+
else RefScope.NONE
|
|
354
|
+
)
|
|
355
|
+
scopes[idx] = replace(scope, scope=scope_type)
|
|
356
|
+
self.table_name_refs[table_id][name] = scopes[idx]
|
|
357
|
+
|
|
358
|
+
def _calculate_table_name_maps(self) -> dict[str, int]:
|
|
359
|
+
self.sheet_name_to_id = {self.model.sheet_name(sid): sid for sid in self.model.sheet_ids()}
|
|
360
|
+
self.sheet_id_to_name = {sid: self.model.sheet_name(sid) for sid in self.model.sheet_ids()}
|
|
361
|
+
self.unique_table_name_to_id = {
|
|
362
|
+
self.model.table_name(tid): tid
|
|
363
|
+
for tid in self.model.table_ids()
|
|
364
|
+
if self.table_names.count(self.model.table_name(tid)) == 1
|
|
365
|
+
}
|
|
366
|
+
self.sheet_table_name_to_id = {
|
|
367
|
+
self.model.sheet_name(sid): {
|
|
368
|
+
self.model.table_name(tid): tid for tid in self.model.table_ids(sid)
|
|
369
|
+
}
|
|
370
|
+
for sid in self.model.sheet_ids()
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
def _scoped_ref_to_cell_ref(self, ref: ScopedNameRef) -> CellRange:
|
|
374
|
+
"""Convert a ScopedNameRef into a CellRange."""
|
|
375
|
+
return CellRange(
|
|
376
|
+
model=self.model,
|
|
377
|
+
to_table_id=ref.table_id,
|
|
378
|
+
row_start=ref.offset if ref.axis == TableAxis.ROW else None,
|
|
379
|
+
col_start=ref.offset if ref.axis == TableAxis.COLUMN else None,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
@staticmethod
|
|
383
|
+
def _name_in_cell_range(name: str, cell_range: list[CellRange]) -> int:
|
|
384
|
+
"""Check whether the given name is found among a list of ScopedNameRefs."""
|
|
385
|
+
match = [
|
|
386
|
+
cell.offset
|
|
387
|
+
for cell in cell_range.values()
|
|
388
|
+
if isinstance(cell, ScopedNameRef) and cell.name == name
|
|
389
|
+
]
|
|
390
|
+
if len(match) == 0:
|
|
391
|
+
return -1
|
|
392
|
+
return match[0]
|
|
393
|
+
|
|
394
|
+
def _deref_doc_scope(self, from_table_id: int, name: str) -> CellRange:
|
|
395
|
+
"""Try and use a name reference in the document scope or current sheet."""
|
|
396
|
+
name = name.replace("''", "'")
|
|
397
|
+
if name.startswith("'") and name.endswith("'"):
|
|
398
|
+
name = name[1:-1]
|
|
399
|
+
|
|
400
|
+
# Try using the name as document scope
|
|
401
|
+
if self._name_in_cell_range(name, self.doc_name_refs) >= 0:
|
|
402
|
+
return self._scoped_ref_to_cell_ref(self.doc_name_refs[name])
|
|
403
|
+
|
|
404
|
+
# Next, try the the current sheet scope
|
|
405
|
+
from_sheet_id = self.model.table_id_to_sheet_id(from_table_id)
|
|
406
|
+
if self._name_in_cell_range(name, self.sheet_name_refs[from_sheet_id]) >= 0:
|
|
407
|
+
return self._scoped_ref_to_cell_ref(self.sheet_name_refs[from_sheet_id][name])
|
|
408
|
+
|
|
409
|
+
msg = f"'{name}' does not exist or scope is ambiguous"
|
|
410
|
+
raise ValueError(msg)
|
|
411
|
+
|
|
412
|
+
def _deref_single_scope(self, from_table_id: int, name_scope: str, name: str) -> CellRange:
|
|
413
|
+
"""
|
|
414
|
+
Resolve a name using a single scope. The scopy could be one of:
|
|
415
|
+
- A sheet name
|
|
416
|
+
- A table within the current sheet
|
|
417
|
+
- A unique table name anwhere in the document
|
|
418
|
+
"""
|
|
419
|
+
name = name.replace("''", "'")
|
|
420
|
+
if name.startswith("'") and name.endswith("'"):
|
|
421
|
+
name = name[1:-1]
|
|
422
|
+
|
|
423
|
+
from_sheet_id = self.model.table_id_to_sheet_id(from_table_id)
|
|
424
|
+
|
|
425
|
+
# 1. Try resolving the name treating the scope as a sheet name
|
|
426
|
+
if name_scope in self.sheet_name_to_id:
|
|
427
|
+
to_sheet_id = self.sheet_name_to_id[name_scope]
|
|
428
|
+
if name in self.sheet_name_refs[to_sheet_id]:
|
|
429
|
+
return self._scoped_ref_to_cell_ref(self.sheet_name_refs[to_sheet_id][name])
|
|
430
|
+
|
|
431
|
+
# 2. Try resolving as a name the current sheet
|
|
432
|
+
from_sheet_name = self.sheet_id_to_name[from_sheet_id]
|
|
433
|
+
if self._name_in_cell_range(name, self.sheet_name_refs[from_sheet_id]) >= 0:
|
|
434
|
+
return self._scoped_ref_to_cell_ref(self.sheet_name_refs[from_sheet_id][name])
|
|
435
|
+
|
|
436
|
+
# 4. Try resolving as a name in a unique table in the document
|
|
437
|
+
if name_scope in self.unique_table_name_to_id:
|
|
438
|
+
to_table_id = self.unique_table_name_to_id[name_scope]
|
|
439
|
+
if self._name_in_cell_range(name, self.table_name_refs[to_table_id]) >= 0:
|
|
440
|
+
# Name is valid in table scope and table name is document-unique
|
|
441
|
+
return self._scoped_ref_to_cell_ref(self.table_name_refs[to_table_id][name])
|
|
442
|
+
|
|
443
|
+
# 4. Try resolving as a name in a table in the current sheet
|
|
444
|
+
if name_scope in self.sheet_table_name_to_id[from_sheet_name]:
|
|
445
|
+
to_table_id = self.sheet_table_name_to_id[from_sheet_name][name_scope]
|
|
446
|
+
# Name is valid in a table in the current sheet
|
|
447
|
+
if (offset := self._name_in_cell_range(name, self.row_ranges[to_table_id])) >= 0:
|
|
448
|
+
return CellRange(
|
|
449
|
+
model=self.model,
|
|
450
|
+
to_table_id=to_table_id,
|
|
451
|
+
row_start=offset,
|
|
452
|
+
)
|
|
453
|
+
if (offset := self._name_in_cell_range(name, self.col_ranges[to_table_id])) >= 0:
|
|
454
|
+
return CellRange(
|
|
455
|
+
model=self.model,
|
|
456
|
+
to_table_id=to_table_id,
|
|
457
|
+
col_start=offset,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# 5. Try resolving the name as a row or column reference
|
|
461
|
+
with suppress(IndexError):
|
|
462
|
+
col_start = xl_col_to_offset(name)
|
|
463
|
+
if col_start:
|
|
464
|
+
return CellRange(model=self.model, to_table_id=to_table_id, col_start=col_start)
|
|
465
|
+
|
|
466
|
+
msg = f"'{name_scope}::{name}' does not exist or scope is ambiguous"
|
|
467
|
+
raise ValueError(msg)
|
|
468
|
+
|
|
469
|
+
def _deref_name(
|
|
470
|
+
self,
|
|
471
|
+
from_table_id: int,
|
|
472
|
+
name_scope_1: str,
|
|
473
|
+
name_scope_2: str,
|
|
474
|
+
name: str,
|
|
475
|
+
) -> CellRange:
|
|
476
|
+
if not name_scope_1 and not name_scope_2:
|
|
477
|
+
return self._deref_doc_scope(from_table_id, name)
|
|
478
|
+
|
|
479
|
+
if not name_scope_1:
|
|
480
|
+
return self._deref_single_scope(from_table_id, name_scope_2, name)
|
|
481
|
+
|
|
482
|
+
# Full sheet::table::name scope
|
|
483
|
+
try:
|
|
484
|
+
to_table_id = self.sheet_table_name_to_id[name_scope_1][name_scope_2]
|
|
485
|
+
if (offset := self._name_in_cell_range(name, self.row_ranges[to_table_id])) >= 0:
|
|
486
|
+
return CellRange(
|
|
487
|
+
model=self.model,
|
|
488
|
+
to_table_id=to_table_id,
|
|
489
|
+
row_start=offset,
|
|
490
|
+
)
|
|
491
|
+
if (offset := self._name_in_cell_range(name, self.col_ranges[to_table_id])) >= 0:
|
|
492
|
+
return CellRange(
|
|
493
|
+
model=self.model,
|
|
494
|
+
to_table_id=to_table_id,
|
|
495
|
+
col_start=offset,
|
|
496
|
+
)
|
|
497
|
+
except KeyError:
|
|
498
|
+
# Catch invalid sheet/table names and fall through
|
|
499
|
+
pass
|
|
500
|
+
|
|
501
|
+
msg = f"'{name_scope_1}::{name_scope_2}::{name}' does not exist or scope is ambiguous"
|
|
502
|
+
raise ValueError(msg)
|
|
503
|
+
|
|
504
|
+
def lookup_named_ref(self, from_table_id: int, ref: CellRange) -> CellRange:
|
|
505
|
+
def range_error_message(ref: CellRange):
|
|
506
|
+
msg = f"{ref.name_scope_1}::" if ref.name_scope_1 else ""
|
|
507
|
+
msg += f"{ref.name_scope_2}::" if ref.name_scope_2 else ""
|
|
508
|
+
msg += ref.row_start
|
|
509
|
+
msg += f":{ref.row_end}" if ref.row_end else ""
|
|
510
|
+
return f"'{msg}' does not exist or scope is ambiguous"
|
|
511
|
+
|
|
512
|
+
self.model.name_ref_cache.refresh()
|
|
513
|
+
if ref.row_start and ref.row_end:
|
|
514
|
+
# Numbers will use the reduced scope of one part of a range to scope the other
|
|
515
|
+
# so start:en d in a document scope will resolve if either of the references can
|
|
516
|
+
# be resolved in that scope.
|
|
517
|
+
start_ref, end_ref = None, None
|
|
518
|
+
with suppress(ValueError):
|
|
519
|
+
start_ref = self._deref_name(
|
|
520
|
+
from_table_id,
|
|
521
|
+
ref.name_scope_1,
|
|
522
|
+
ref.name_scope_2,
|
|
523
|
+
ref.row_start,
|
|
524
|
+
)
|
|
525
|
+
with suppress(ValueError):
|
|
526
|
+
end_ref = self._deref_name(
|
|
527
|
+
from_table_id,
|
|
528
|
+
ref.name_scope_1,
|
|
529
|
+
ref.name_scope_2,
|
|
530
|
+
ref.row_end,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
if start_ref is None and end_ref is None:
|
|
534
|
+
raise ValueError(range_error_message(ref))
|
|
535
|
+
|
|
536
|
+
if start_ref is None:
|
|
537
|
+
row_start = [
|
|
538
|
+
v.offset
|
|
539
|
+
for k, v in self.row_ranges[end_ref.to_table_id].items()
|
|
540
|
+
if v is not None and v.name == ref.row_start
|
|
541
|
+
]
|
|
542
|
+
col_start = [
|
|
543
|
+
v.offset
|
|
544
|
+
for k, v in self.col_ranges[end_ref.to_table_id].items()
|
|
545
|
+
if v is not None and v.name == ref.row_start
|
|
546
|
+
]
|
|
547
|
+
if len(row_start) == 0 and len(col_start) == 0:
|
|
548
|
+
raise ValueError(range_error_message(ref))
|
|
549
|
+
|
|
550
|
+
start_ref = CellRange(
|
|
551
|
+
model=self.model,
|
|
552
|
+
to_table_id=end_ref.to_table_id,
|
|
553
|
+
row_start=row_start[0] if row_start else None,
|
|
554
|
+
col_start=col_start[0] if col_start else None,
|
|
555
|
+
)
|
|
556
|
+
elif end_ref is None:
|
|
557
|
+
row_end = [
|
|
558
|
+
v.offset
|
|
559
|
+
for k, v in self.row_ranges[start_ref.to_table_id].items()
|
|
560
|
+
if v is not None and v.name == ref.row_end
|
|
561
|
+
]
|
|
562
|
+
col_end = [
|
|
563
|
+
v.offset
|
|
564
|
+
for k, v in self.col_ranges[start_ref.to_table_id].items()
|
|
565
|
+
if v is not None and v.name == ref.row_end
|
|
566
|
+
]
|
|
567
|
+
if len(row_end) == 0 and len(col_end) == 0:
|
|
568
|
+
raise ValueError(range_error_message(ref))
|
|
569
|
+
end_ref = CellRange(
|
|
570
|
+
model=self.model,
|
|
571
|
+
to_table_id=start_ref.to_table_id,
|
|
572
|
+
row_start=row_end[0] if row_end else None,
|
|
573
|
+
col_start=col_end[0] if col_end else None,
|
|
574
|
+
)
|
|
575
|
+
elif start_ref.row_start is None:
|
|
576
|
+
return CellRange(
|
|
577
|
+
model=self.model,
|
|
578
|
+
to_table_id=start_ref.to_table_id,
|
|
579
|
+
col_start=start_ref.col_start,
|
|
580
|
+
col_end=end_ref.col_start,
|
|
581
|
+
)
|
|
582
|
+
elif start_ref.col_start is None:
|
|
583
|
+
return CellRange(
|
|
584
|
+
model=self.model,
|
|
585
|
+
to_table_id=start_ref.to_table_id,
|
|
586
|
+
row_start=start_ref.row_start,
|
|
587
|
+
row_end=end_ref.row_start,
|
|
588
|
+
)
|
|
589
|
+
start_ref.row_end = end_ref.row_start
|
|
590
|
+
start_ref.col_end = end_ref.col_start
|
|
591
|
+
return start_ref
|
|
592
|
+
|
|
593
|
+
if ref.row_start:
|
|
594
|
+
return self._deref_name(
|
|
595
|
+
from_table_id,
|
|
596
|
+
ref.name_scope_1,
|
|
597
|
+
ref.name_scope_2,
|
|
598
|
+
ref.row_start,
|
|
599
|
+
)
|
|
600
|
+
return self._deref_name(
|
|
601
|
+
from_table_id,
|
|
602
|
+
ref.name_scope_1,
|
|
603
|
+
ref.name_scope_2,
|
|
604
|
+
ref.col_start,
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
def calculate_named_ranges(self):
|
|
608
|
+
"""
|
|
609
|
+
Find the globally unique row and column headers and the table unique
|
|
610
|
+
row and column headers for use in range references. Returns a dict
|
|
611
|
+
mapping table ID to lists of rows and columns and their names if
|
|
612
|
+
they are unique.
|
|
613
|
+
"""
|
|
614
|
+
self.doc_name_refs = defaultdict(int)
|
|
615
|
+
self.sheet_name_refs = {}
|
|
616
|
+
self.table_names = self.model.table_names()
|
|
617
|
+
self._calculate_table_name_maps()
|
|
618
|
+
|
|
619
|
+
self.row_ranges = {}
|
|
620
|
+
self.col_ranges = {}
|
|
621
|
+
for sheet_id in self.model.sheet_ids():
|
|
622
|
+
self.sheet_name_refs[sheet_id] = defaultdict(int)
|
|
623
|
+
for table_id in self.model.table_ids(sheet_id):
|
|
624
|
+
self.table_name_refs[table_id] = defaultdict(int)
|
|
625
|
+
self.row_ranges[table_id] = self._calculate_name_scopes(
|
|
626
|
+
sheet_id,
|
|
627
|
+
table_id,
|
|
628
|
+
TableAxis.ROW,
|
|
629
|
+
)
|
|
630
|
+
self.col_ranges[table_id] = self._calculate_name_scopes(
|
|
631
|
+
sheet_id,
|
|
632
|
+
table_id,
|
|
633
|
+
TableAxis.COLUMN,
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Re-init the list of document-scoped names if they are unique
|
|
637
|
+
self.doc_name_refs = {
|
|
638
|
+
name: None for name, count in self.doc_name_refs.items() if count == 1
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
for sheet_id in self.model.sheet_ids():
|
|
642
|
+
# Re-init the list of sheet-scoped names if they are unique
|
|
643
|
+
self.sheet_name_refs[sheet_id] = {
|
|
644
|
+
name: None for name, count in self.sheet_name_refs[sheet_id].items() if count == 1
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
for table_id in self.model.table_ids(sheet_id):
|
|
648
|
+
self._calculate_scope_types(
|
|
649
|
+
sheet_id,
|
|
650
|
+
table_id,
|
|
651
|
+
TableAxis.ROW,
|
|
652
|
+
self.row_ranges[table_id],
|
|
653
|
+
)
|
|
654
|
+
self._calculate_scope_types(
|
|
655
|
+
sheet_id,
|
|
656
|
+
table_id,
|
|
657
|
+
TableAxis.COLUMN,
|
|
658
|
+
self.col_ranges[table_id],
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
# Cell reference conversion from https://github.com/jmcnamara/XlsxWriter
|
|
663
|
+
# Copyright (c) 2013-2021, John McNamara <jmcnamara@cpan.org>
|
|
664
|
+
range_parts = re.compile(r"(\$?)([A-Z]{1,3})(\$?)(\d+)")
|
|
665
|
+
|
|
666
|
+
col_parts = re.compile(r"(\$?)([A-Z]{1,3})")
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def xl_col_to_offset(col_str: str) -> int:
|
|
670
|
+
"""
|
|
671
|
+
Convert a column reference in A1 notation to a zero indexed column.
|
|
672
|
+
|
|
673
|
+
Parameters
|
|
674
|
+
----------
|
|
675
|
+
col_str: str
|
|
676
|
+
A1 notation column reference
|
|
677
|
+
|
|
678
|
+
Returns
|
|
679
|
+
-------
|
|
680
|
+
col: int
|
|
681
|
+
Column numbers (zero indexed).
|
|
682
|
+
|
|
683
|
+
"""
|
|
684
|
+
if not col_str:
|
|
685
|
+
return 0
|
|
686
|
+
|
|
687
|
+
match = col_parts.match(col_str)
|
|
688
|
+
if not match:
|
|
689
|
+
msg = f"invalid cell reference {col_str}"
|
|
690
|
+
raise IndexError(msg)
|
|
691
|
+
|
|
692
|
+
col_str = match.group(2)
|
|
693
|
+
|
|
694
|
+
# Convert base26 column string to number.
|
|
695
|
+
col = 0
|
|
696
|
+
for expn, char in enumerate(reversed(match.group(2))):
|
|
697
|
+
col += (ord(char) - ord("A") + 1) * (26**expn)
|
|
698
|
+
|
|
699
|
+
# Convert 1-index to zero-index
|
|
700
|
+
col -= 1
|
|
701
|
+
|
|
702
|
+
return col
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def xl_cell_to_rowcol(cell_str: str) -> tuple:
|
|
706
|
+
"""
|
|
707
|
+
Convert a cell reference in A1 notation to a zero indexed row and column.
|
|
708
|
+
|
|
709
|
+
Parameters
|
|
710
|
+
----------
|
|
711
|
+
cell_str: str
|
|
712
|
+
A1 notation cell reference
|
|
713
|
+
|
|
714
|
+
Returns
|
|
715
|
+
-------
|
|
716
|
+
row, col: int, int
|
|
717
|
+
Cell row and column numbers (zero indexed).
|
|
718
|
+
|
|
719
|
+
"""
|
|
720
|
+
if not cell_str:
|
|
721
|
+
return 0, 0
|
|
722
|
+
|
|
723
|
+
match = range_parts.match(cell_str)
|
|
724
|
+
if not match:
|
|
725
|
+
msg = f"invalid cell reference {cell_str}"
|
|
726
|
+
raise IndexError(msg)
|
|
727
|
+
|
|
728
|
+
col_str = match.group(2)
|
|
729
|
+
row_str = match.group(4)
|
|
730
|
+
|
|
731
|
+
# Convert base26 column string to number.
|
|
732
|
+
col = 0
|
|
733
|
+
for expn, char in enumerate(reversed(col_str)):
|
|
734
|
+
col += (ord(char) - ord("A") + 1) * (26**expn)
|
|
735
|
+
|
|
736
|
+
# Convert 1-index to zero-index
|
|
737
|
+
row = int(row_str) - 1
|
|
738
|
+
col -= 1
|
|
739
|
+
|
|
740
|
+
return row, col
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def xl_range(first_row, first_col, last_row, last_col):
|
|
744
|
+
"""
|
|
745
|
+
Convert zero indexed row and col cell references to a A1:B1 range string.
|
|
746
|
+
|
|
747
|
+
Parameters
|
|
748
|
+
----------
|
|
749
|
+
first_row: int
|
|
750
|
+
The first cell row.
|
|
751
|
+
first_col: int
|
|
752
|
+
The first cell column.
|
|
753
|
+
last_row: int
|
|
754
|
+
The last cell row.
|
|
755
|
+
last_col: int
|
|
756
|
+
The last cell column.
|
|
757
|
+
|
|
758
|
+
Returns
|
|
759
|
+
-------
|
|
760
|
+
str:
|
|
761
|
+
A1:B1 style range string.
|
|
762
|
+
|
|
763
|
+
"""
|
|
764
|
+
range1 = xl_rowcol_to_cell(first_row, first_col)
|
|
765
|
+
range2 = xl_rowcol_to_cell(last_row, last_col)
|
|
766
|
+
|
|
767
|
+
if range1 == range2:
|
|
768
|
+
return range1
|
|
769
|
+
return range1 + ":" + range2
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def xl_rowcol_to_cell(row, col, row_abs=False, col_abs=False):
|
|
773
|
+
"""
|
|
774
|
+
Convert a zero indexed row and column cell reference to a A1 style string.
|
|
775
|
+
|
|
776
|
+
Parameters
|
|
777
|
+
----------
|
|
778
|
+
row: int
|
|
779
|
+
The cell row.
|
|
780
|
+
col: int
|
|
781
|
+
The cell column.
|
|
782
|
+
row_abs: bool
|
|
783
|
+
If ``True``, make the row absolute.
|
|
784
|
+
col_abs: bool
|
|
785
|
+
If ``True``, make the column absolute.
|
|
786
|
+
|
|
787
|
+
Returns
|
|
788
|
+
-------
|
|
789
|
+
str:
|
|
790
|
+
A1 style string.
|
|
791
|
+
|
|
792
|
+
"""
|
|
793
|
+
if row < 0:
|
|
794
|
+
msg = f"row reference {row} below zero"
|
|
795
|
+
raise IndexError(msg)
|
|
796
|
+
|
|
797
|
+
if col < 0:
|
|
798
|
+
msg = f"column reference {col} below zero"
|
|
799
|
+
raise IndexError(msg)
|
|
800
|
+
|
|
801
|
+
row += 1 # Change to 1-index.
|
|
802
|
+
row_abs = "$" if row_abs else ""
|
|
803
|
+
|
|
804
|
+
col_str = xl_col_to_name(col, col_abs)
|
|
805
|
+
|
|
806
|
+
return col_str + row_abs + str(row)
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
def xl_col_to_name(col, col_abs=False):
|
|
810
|
+
"""
|
|
811
|
+
Convert a zero indexed column cell reference to a string.
|
|
812
|
+
|
|
813
|
+
Parameters
|
|
814
|
+
----------
|
|
815
|
+
col: int
|
|
816
|
+
The column number (zero indexed).
|
|
817
|
+
col_abs: bool, default: False
|
|
818
|
+
If ``True``, make the column absolute.
|
|
819
|
+
|
|
820
|
+
Returns
|
|
821
|
+
-------
|
|
822
|
+
str:
|
|
823
|
+
Column in A1 notation.
|
|
824
|
+
|
|
825
|
+
"""
|
|
826
|
+
if col < 0:
|
|
827
|
+
msg = f"column reference {col} below zero"
|
|
828
|
+
raise IndexError(msg)
|
|
829
|
+
|
|
830
|
+
col += 1 # Change to 1-index.
|
|
831
|
+
col_str = ""
|
|
832
|
+
col_abs = "$" if col_abs else ""
|
|
833
|
+
|
|
834
|
+
while col:
|
|
835
|
+
# Set remainder from 1 .. 26
|
|
836
|
+
remainder = col % 26
|
|
837
|
+
|
|
838
|
+
if remainder == 0:
|
|
839
|
+
remainder = 26
|
|
840
|
+
|
|
841
|
+
# Convert the remainder to a character.
|
|
842
|
+
col_letter = chr(ord("A") + remainder - 1)
|
|
843
|
+
|
|
844
|
+
# Accumulate the column letters, right to left.
|
|
845
|
+
col_str = col_letter + col_str
|
|
846
|
+
|
|
847
|
+
# Get the next order of magnitude.
|
|
848
|
+
col = int((col - 1) / 26)
|
|
849
|
+
|
|
850
|
+
return col_abs + col_str
|