numbers-parser 4.14.4__py3-none-any.whl → 4.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,850 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections import defaultdict
5
+ from contextlib import suppress
6
+ from dataclasses import dataclass, field, replace
7
+ from enum import IntEnum
8
+
9
+ from numbers_parser.constants import OPERATOR_PRECEDENCE
10
+
11
+ __all__ = ["xl_cell_to_rowcol", "xl_col_to_name", "xl_range", "xl_rowcol_to_cell"]
12
+
13
+
14
+ class TableAxis(IntEnum):
15
+ """Indicates whether a cache is for rows or columns."""
16
+
17
+ ROW = 1
18
+ COLUMN = 2
19
+
20
+
21
+ class RefScope(IntEnum):
22
+ """The required scope for a name reference in a document."""
23
+
24
+ """Name is unique to the document."""
25
+ DOCUMENT = 1
26
+ """Name is unique to a sheet."""
27
+ SHEET = 2
28
+ """Name is unique to a table and that table is uniquely named."""
29
+ TABLE = 3
30
+ """All other names."""
31
+ NONE = 4
32
+
33
+
34
+ @dataclass
35
+ class ScopedNameRef:
36
+ name: str
37
+ offset: int = None
38
+ axis: TableAxis = None
39
+ table_id: int = None
40
+ scope: RefScope = RefScope.NONE
41
+
42
+
43
+ class CellRangeType(IntEnum):
44
+ ROW_RANGE = 1
45
+ COL_RANGE = 2
46
+ RANGE = 3
47
+ NAMED_RANGE = 4
48
+ CELL = 5
49
+ NAMED_ROW_COLUMN = 6
50
+
51
+
52
+ @dataclass
53
+ class CellRange:
54
+ model: object = None
55
+ row_start: int | str = None
56
+ row_end: int = None
57
+ col_start: int | str = None
58
+ col_end: int = None
59
+ row_start_is_abs: bool = False
60
+ row_end_is_abs: bool = False
61
+ col_start_is_abs: bool = False
62
+ col_end_is_abs: bool = False
63
+ from_table_id: int = None
64
+ to_table_id: int = None
65
+ range_type: CellRangeType = None
66
+ _table_names: list[str] = field(init=False, default=None, repr=False)
67
+
68
+ def __post_init__(self):
69
+ if self._table_names is None:
70
+ self._initialize_table_data()
71
+ self.model.name_ref_cache.refresh()
72
+ self._set_sheet_ids()
73
+
74
+ def _initialize_table_data(self):
75
+ self._table_names = self.model.table_names()
76
+ self.table_name_unique = {
77
+ name: self._table_names.count(name) == 1 for name in self._table_names
78
+ }
79
+
80
+ def _set_sheet_ids(self):
81
+ """Determine the sheet IDs for the referenced tables."""
82
+ if self.to_table_id is None:
83
+ self.to_table_id = self.from_table_id
84
+ self.from_sheet_id = self.model.table_id_to_sheet_id(self.from_table_id)
85
+ self.to_sheet_id = self.model.table_id_to_sheet_id(self.to_table_id)
86
+
87
+ def expand_ref(self, ref: str, is_abs: bool = False, no_prefix=False) -> str:
88
+ self.model.name_ref_cache.refresh()
89
+ is_document_unique = (
90
+ ref.scope == RefScope.DOCUMENT if isinstance(ref, ScopedNameRef) else False
91
+ )
92
+ is_sheet_unique = ref.scope == RefScope.SHEET if isinstance(ref, ScopedNameRef) else False
93
+ is_table_unique = ref.scope == RefScope.TABLE if isinstance(ref, ScopedNameRef) else False
94
+
95
+ if isinstance(ref, ScopedNameRef):
96
+ ref_str = f"${ref.name}" if is_abs else ref.name
97
+ else:
98
+ ref_str = f"${ref}" if is_abs else ref
99
+ if any(x in ref_str for x in OPERATOR_PRECEDENCE):
100
+ ref_str = f"'{ref_str}'"
101
+ elif "'" in ref_str:
102
+ ref_str = ref_str.replace("'", "'''")
103
+
104
+ if no_prefix or is_document_unique:
105
+ return ref_str
106
+
107
+ if self.from_table_id == self.to_table_id:
108
+ return ref_str
109
+
110
+ table_name = self.model.table_name(self.to_table_id)
111
+ if self.from_sheet_id == self.to_sheet_id and is_sheet_unique:
112
+ # If absolute Numbers seems to unnecessarily include the table name
113
+ return f"{table_name}::{ref_str}" if is_abs else ref_str
114
+
115
+ is_table_unique |= self.table_name_unique[table_name]
116
+ if self.from_sheet_id == self.to_sheet_id or is_table_unique:
117
+ return f"{table_name}::{ref_str}"
118
+
119
+ sheet_name = self.model.sheet_name(self.to_sheet_id)
120
+ return f"{sheet_name}::{table_name}::{ref_str}"
121
+
122
+ def __str__(self):
123
+ self.model.name_ref_cache.refresh()
124
+ # Handle row-only ranges
125
+ if self.col_start is None:
126
+ row_range = self.model.name_ref_cache.row_ranges[self.to_table_id]
127
+ return self._format_row_range(self.row_start, self.row_end, row_range)
128
+
129
+ # Handle column-only ranges
130
+ if self.row_start is None:
131
+ col_range = self.model.name_ref_cache.col_ranges[self.to_table_id]
132
+ return self._format_col_range(self.col_start, self.col_end, col_range)
133
+
134
+ # Handle full cell ranges
135
+ return self._format_cell_range(self.row_start, self.col_start, self.row_end, self.col_end)
136
+
137
+ def _format_row_range(self, row_start, row_end, row_range):
138
+ """Formats a row-only range."""
139
+ if row_end is None:
140
+ return self._format_single_row(row_start, row_range)
141
+ return self._format_row_span(row_start, row_end, row_range)
142
+
143
+ def _format_single_row(self, row_start, row_range):
144
+ """Formats a single row, either numeric or named."""
145
+ if row_range[row_start] is None:
146
+ return self._format_numeric_row(row_start)
147
+ return self.expand_ref(row_range[row_start], self.row_start_is_abs)
148
+
149
+ def _format_numeric_row(self, row_start):
150
+ """Formats a single numeric row."""
151
+ return ":".join(
152
+ [
153
+ self.expand_ref(str(row_start + 1), self.row_start_is_abs),
154
+ self.expand_ref(str(row_start + 1), self.row_start_is_abs, no_prefix=True),
155
+ ],
156
+ )
157
+
158
+ def _format_row_span(self, row_start, row_end, row_range):
159
+ """Formats a range of rows."""
160
+ if row_range[row_start] is None:
161
+ return ":".join(
162
+ [
163
+ self.expand_ref(str(row_start + 1), self.row_start_is_abs),
164
+ self.expand_ref(str(row_end + 1), self.row_end_is_abs, no_prefix=True),
165
+ ],
166
+ )
167
+ return ":".join(
168
+ [
169
+ self.expand_ref(
170
+ row_range[row_start],
171
+ self.row_start_is_abs,
172
+ no_prefix=row_range[row_start].scope == RefScope.DOCUMENT
173
+ or row_range[row_end].scope == RefScope.DOCUMENT,
174
+ ),
175
+ self.expand_ref(row_range[row_end], self.row_end_is_abs, no_prefix=True),
176
+ ],
177
+ )
178
+
179
+ def _format_col_range(self, col_start, col_end, col_range):
180
+ """Formats a column-only range."""
181
+ if col_end is None:
182
+ return self._format_single_column(col_start, col_range)
183
+ return self._format_column_span(col_start, col_end, col_range)
184
+
185
+ def _format_single_column(self, col_start, col_range):
186
+ """Formats a single column, either numeric or named."""
187
+ if col_range[col_start] is None:
188
+ return self.expand_ref(xl_col_to_name(col_start, col_abs=self.col_start_is_abs))
189
+ return self.expand_ref(col_range[col_start], self.col_start_is_abs)
190
+
191
+ def _format_column_span(self, col_start, col_end, col_range):
192
+ """Formats a range of columns."""
193
+ if col_range[col_start] is None:
194
+ return f"{self.expand_ref(xl_col_to_name(col_start, col_abs=self.col_start_is_abs))}:{self.expand_ref(xl_col_to_name(col_end, col_abs=self.col_end_is_abs), no_prefix=True)}"
195
+ return ":".join(
196
+ [
197
+ self.expand_ref(
198
+ col_range[col_start],
199
+ self.col_start_is_abs,
200
+ no_prefix=col_range[col_start].scope == RefScope.DOCUMENT
201
+ or col_range[col_end].scope == RefScope.DOCUMENT,
202
+ ),
203
+ self.expand_ref(col_range[col_end], self.col_end_is_abs, no_prefix=True),
204
+ ],
205
+ )
206
+
207
+ def _format_cell_range(self, row_start, col_start, row_end, col_end):
208
+ """Formats a full cell range."""
209
+ if row_end is None or col_end is None:
210
+ return self.expand_ref(
211
+ xl_rowcol_to_cell(
212
+ row_start,
213
+ col_start,
214
+ row_abs=self.row_start_is_abs,
215
+ col_abs=self.col_start_is_abs,
216
+ ),
217
+ )
218
+ return ":".join(
219
+ [
220
+ self.expand_ref(
221
+ xl_rowcol_to_cell(
222
+ row_start,
223
+ col_start,
224
+ row_abs=self.row_start_is_abs,
225
+ col_abs=self.col_start_is_abs,
226
+ ),
227
+ ),
228
+ self.expand_ref(
229
+ xl_rowcol_to_cell(
230
+ row_end,
231
+ col_end,
232
+ row_abs=self.row_end_is_abs,
233
+ col_abs=self.col_end_is_abs,
234
+ ),
235
+ no_prefix=True,
236
+ ),
237
+ ],
238
+ )
239
+
240
+
241
+ class ScopedNameRefCache:
242
+ def __init__(self, model):
243
+ self.model = model
244
+ self.doc_name_refs = {}
245
+ self.sheet_name_refs = {}
246
+ self.table_name_refs = {}
247
+ self.row_ranges = {}
248
+ self.col_ranges = {}
249
+ self._dirty_cache = True
250
+ self.table_names = []
251
+
252
+ def mark_dirty(self):
253
+ self._dirty_cache = True
254
+
255
+ def refresh(self):
256
+ if self._dirty_cache:
257
+ self.calculate_named_ranges()
258
+ self._dirty_cache = False
259
+
260
+ @staticmethod
261
+ def _exact_count(pool: list, value: int | str | bool):
262
+ return sum(1 for x in pool if type(x) is type(value) and x == value)
263
+
264
+ def _row_data(self, table_id: int, row: int) -> int | str | bool | None:
265
+ num_header_cols = self.model.num_header_cols(table_id)
266
+ return self.model._table_data[table_id][row][num_header_cols - 1].formatted_value
267
+
268
+ def _column_data(self, table_id: int, col: int) -> int | str | bool | None:
269
+ num_header_rows = self.model.num_header_rows(table_id)
270
+ return self.model._table_data[table_id][num_header_rows - 1][col].formatted_value
271
+
272
+ def _calculate_name_scopes(
273
+ self,
274
+ sheet_id: int,
275
+ table_id: int,
276
+ axis: TableAxis,
277
+ ) -> dict[int, str | None]:
278
+ num_header_rows = self.model.num_header_rows(table_id)
279
+ num_header_cols = self.model.num_header_cols(table_id)
280
+
281
+ if axis == TableAxis.ROW:
282
+ data_lookup = self._row_data
283
+ range_start = num_header_rows
284
+ range_end = self.model.number_of_rows(table_id)
285
+ first_offset = num_header_rows
286
+ else:
287
+ data_lookup = self._column_data
288
+ range_start = num_header_cols
289
+ range_end = self.model.number_of_columns(table_id)
290
+ first_offset = num_header_cols
291
+
292
+ if (axis == TableAxis.ROW and num_header_cols == 0) or (
293
+ axis == TableAxis.COLUMN and num_header_rows == 0
294
+ ):
295
+ return {idx: None for idx in range(range_end)}
296
+
297
+ scopes = {}
298
+ names = []
299
+ all_names = [data_lookup(table_id, idx) for idx in range(range_start, range_end)]
300
+ for idx in range(range_end):
301
+ if idx < first_offset:
302
+ names.append(None)
303
+ scopes[idx] = None
304
+ else:
305
+ name = data_lookup(table_id, idx)
306
+ if name is None:
307
+ scopes[idx] = None
308
+ elif self._exact_count(all_names, name) > 1:
309
+ names.append(None)
310
+ scopes[idx] = None
311
+ else:
312
+ names.append(name)
313
+ scopes[idx] = name
314
+
315
+ for name in names:
316
+ self.doc_name_refs[name] += 1
317
+ self.sheet_name_refs[sheet_id][name] += 1
318
+
319
+ return scopes
320
+
321
+ def _calculate_scope_types(
322
+ self,
323
+ sheet_id: int,
324
+ table_id: int,
325
+ axis: TableAxis,
326
+ scopes: dict[int, CellRange | None],
327
+ ):
328
+ """
329
+ For any locally unique row/column names, tag whether they are table-unique,
330
+ sheet-unique or document-unique names.
331
+ """
332
+ if axis == TableAxis.ROW:
333
+ axis_range = range(self.model.number_of_rows(table_id))
334
+ else:
335
+ axis_range = range(self.model.number_of_columns(table_id))
336
+
337
+ table_name = self.model.table_name(table_id)
338
+ for idx in axis_range:
339
+ name = scopes[idx]
340
+ scope = ScopedNameRef(name, axis=axis, table_id=table_id, offset=idx)
341
+ if name is None:
342
+ continue
343
+ if name in self.doc_name_refs:
344
+ scopes[idx] = replace(scope, scope=RefScope.DOCUMENT)
345
+ self.doc_name_refs[name] = scopes[idx]
346
+ elif name in self.sheet_name_refs[sheet_id]:
347
+ scopes[idx] = replace(scope, scope=RefScope.SHEET)
348
+ self.sheet_name_refs[sheet_id][name] = scopes[idx]
349
+ else:
350
+ scope_type = (
351
+ RefScope.TABLE
352
+ if self._exact_count(self.table_names, table_name) == 1
353
+ else RefScope.NONE
354
+ )
355
+ scopes[idx] = replace(scope, scope=scope_type)
356
+ self.table_name_refs[table_id][name] = scopes[idx]
357
+
358
+ def _calculate_table_name_maps(self) -> dict[str, int]:
359
+ self.sheet_name_to_id = {self.model.sheet_name(sid): sid for sid in self.model.sheet_ids()}
360
+ self.sheet_id_to_name = {sid: self.model.sheet_name(sid) for sid in self.model.sheet_ids()}
361
+ self.unique_table_name_to_id = {
362
+ self.model.table_name(tid): tid
363
+ for tid in self.model.table_ids()
364
+ if self.table_names.count(self.model.table_name(tid)) == 1
365
+ }
366
+ self.sheet_table_name_to_id = {
367
+ self.model.sheet_name(sid): {
368
+ self.model.table_name(tid): tid for tid in self.model.table_ids(sid)
369
+ }
370
+ for sid in self.model.sheet_ids()
371
+ }
372
+
373
+ def _scoped_ref_to_cell_ref(self, ref: ScopedNameRef) -> CellRange:
374
+ """Convert a ScopedNameRef into a CellRange."""
375
+ return CellRange(
376
+ model=self.model,
377
+ to_table_id=ref.table_id,
378
+ row_start=ref.offset if ref.axis == TableAxis.ROW else None,
379
+ col_start=ref.offset if ref.axis == TableAxis.COLUMN else None,
380
+ )
381
+
382
+ @staticmethod
383
+ def _name_in_cell_range(name: str, cell_range: list[CellRange]) -> int:
384
+ """Check whether the given name is found among a list of ScopedNameRefs."""
385
+ match = [
386
+ cell.offset
387
+ for cell in cell_range.values()
388
+ if isinstance(cell, ScopedNameRef) and cell.name == name
389
+ ]
390
+ if len(match) == 0:
391
+ return -1
392
+ return match[0]
393
+
394
+ def _deref_doc_scope(self, from_table_id: int, name: str) -> CellRange:
395
+ """Try and use a name reference in the document scope or current sheet."""
396
+ name = name.replace("''", "'")
397
+ if name.startswith("'") and name.endswith("'"):
398
+ name = name[1:-1]
399
+
400
+ # Try using the name as document scope
401
+ if self._name_in_cell_range(name, self.doc_name_refs) >= 0:
402
+ return self._scoped_ref_to_cell_ref(self.doc_name_refs[name])
403
+
404
+ # Next, try the the current sheet scope
405
+ from_sheet_id = self.model.table_id_to_sheet_id(from_table_id)
406
+ if self._name_in_cell_range(name, self.sheet_name_refs[from_sheet_id]) >= 0:
407
+ return self._scoped_ref_to_cell_ref(self.sheet_name_refs[from_sheet_id][name])
408
+
409
+ msg = f"'{name}' does not exist or scope is ambiguous"
410
+ raise ValueError(msg)
411
+
412
+ def _deref_single_scope(self, from_table_id: int, name_scope: str, name: str) -> CellRange:
413
+ """
414
+ Resolve a name using a single scope. The scopy could be one of:
415
+ - A sheet name
416
+ - A table within the current sheet
417
+ - A unique table name anwhere in the document
418
+ """
419
+ name = name.replace("''", "'")
420
+ if name.startswith("'") and name.endswith("'"):
421
+ name = name[1:-1]
422
+
423
+ from_sheet_id = self.model.table_id_to_sheet_id(from_table_id)
424
+
425
+ # 1. Try resolving the name treating the scope as a sheet name
426
+ if name_scope in self.sheet_name_to_id:
427
+ to_sheet_id = self.sheet_name_to_id[name_scope]
428
+ if name in self.sheet_name_refs[to_sheet_id]:
429
+ return self._scoped_ref_to_cell_ref(self.sheet_name_refs[to_sheet_id][name])
430
+
431
+ # 2. Try resolving as a name the current sheet
432
+ from_sheet_name = self.sheet_id_to_name[from_sheet_id]
433
+ if self._name_in_cell_range(name, self.sheet_name_refs[from_sheet_id]) >= 0:
434
+ return self._scoped_ref_to_cell_ref(self.sheet_name_refs[from_sheet_id][name])
435
+
436
+ # 4. Try resolving as a name in a unique table in the document
437
+ if name_scope in self.unique_table_name_to_id:
438
+ to_table_id = self.unique_table_name_to_id[name_scope]
439
+ if self._name_in_cell_range(name, self.table_name_refs[to_table_id]) >= 0:
440
+ # Name is valid in table scope and table name is document-unique
441
+ return self._scoped_ref_to_cell_ref(self.table_name_refs[to_table_id][name])
442
+
443
+ # 4. Try resolving as a name in a table in the current sheet
444
+ if name_scope in self.sheet_table_name_to_id[from_sheet_name]:
445
+ to_table_id = self.sheet_table_name_to_id[from_sheet_name][name_scope]
446
+ # Name is valid in a table in the current sheet
447
+ if (offset := self._name_in_cell_range(name, self.row_ranges[to_table_id])) >= 0:
448
+ return CellRange(
449
+ model=self.model,
450
+ to_table_id=to_table_id,
451
+ row_start=offset,
452
+ )
453
+ if (offset := self._name_in_cell_range(name, self.col_ranges[to_table_id])) >= 0:
454
+ return CellRange(
455
+ model=self.model,
456
+ to_table_id=to_table_id,
457
+ col_start=offset,
458
+ )
459
+
460
+ # 5. Try resolving the name as a row or column reference
461
+ with suppress(IndexError):
462
+ col_start = xl_col_to_offset(name)
463
+ if col_start:
464
+ return CellRange(model=self.model, to_table_id=to_table_id, col_start=col_start)
465
+
466
+ msg = f"'{name_scope}::{name}' does not exist or scope is ambiguous"
467
+ raise ValueError(msg)
468
+
469
+ def _deref_name(
470
+ self,
471
+ from_table_id: int,
472
+ name_scope_1: str,
473
+ name_scope_2: str,
474
+ name: str,
475
+ ) -> CellRange:
476
+ if not name_scope_1 and not name_scope_2:
477
+ return self._deref_doc_scope(from_table_id, name)
478
+
479
+ if not name_scope_1:
480
+ return self._deref_single_scope(from_table_id, name_scope_2, name)
481
+
482
+ # Full sheet::table::name scope
483
+ try:
484
+ to_table_id = self.sheet_table_name_to_id[name_scope_1][name_scope_2]
485
+ if (offset := self._name_in_cell_range(name, self.row_ranges[to_table_id])) >= 0:
486
+ return CellRange(
487
+ model=self.model,
488
+ to_table_id=to_table_id,
489
+ row_start=offset,
490
+ )
491
+ if (offset := self._name_in_cell_range(name, self.col_ranges[to_table_id])) >= 0:
492
+ return CellRange(
493
+ model=self.model,
494
+ to_table_id=to_table_id,
495
+ col_start=offset,
496
+ )
497
+ except KeyError:
498
+ # Catch invalid sheet/table names and fall through
499
+ pass
500
+
501
+ msg = f"'{name_scope_1}::{name_scope_2}::{name}' does not exist or scope is ambiguous"
502
+ raise ValueError(msg)
503
+
504
+ def lookup_named_ref(self, from_table_id: int, ref: CellRange) -> CellRange:
505
+ def range_error_message(ref: CellRange):
506
+ msg = f"{ref.name_scope_1}::" if ref.name_scope_1 else ""
507
+ msg += f"{ref.name_scope_2}::" if ref.name_scope_2 else ""
508
+ msg += ref.row_start
509
+ msg += f":{ref.row_end}" if ref.row_end else ""
510
+ return f"'{msg}' does not exist or scope is ambiguous"
511
+
512
+ self.model.name_ref_cache.refresh()
513
+ if ref.row_start and ref.row_end:
514
+ # Numbers will use the reduced scope of one part of a range to scope the other
515
+ # so start:en d in a document scope will resolve if either of the references can
516
+ # be resolved in that scope.
517
+ start_ref, end_ref = None, None
518
+ with suppress(ValueError):
519
+ start_ref = self._deref_name(
520
+ from_table_id,
521
+ ref.name_scope_1,
522
+ ref.name_scope_2,
523
+ ref.row_start,
524
+ )
525
+ with suppress(ValueError):
526
+ end_ref = self._deref_name(
527
+ from_table_id,
528
+ ref.name_scope_1,
529
+ ref.name_scope_2,
530
+ ref.row_end,
531
+ )
532
+
533
+ if start_ref is None and end_ref is None:
534
+ raise ValueError(range_error_message(ref))
535
+
536
+ if start_ref is None:
537
+ row_start = [
538
+ v.offset
539
+ for k, v in self.row_ranges[end_ref.to_table_id].items()
540
+ if v is not None and v.name == ref.row_start
541
+ ]
542
+ col_start = [
543
+ v.offset
544
+ for k, v in self.col_ranges[end_ref.to_table_id].items()
545
+ if v is not None and v.name == ref.row_start
546
+ ]
547
+ if len(row_start) == 0 and len(col_start) == 0:
548
+ raise ValueError(range_error_message(ref))
549
+
550
+ start_ref = CellRange(
551
+ model=self.model,
552
+ to_table_id=end_ref.to_table_id,
553
+ row_start=row_start[0] if row_start else None,
554
+ col_start=col_start[0] if col_start else None,
555
+ )
556
+ elif end_ref is None:
557
+ row_end = [
558
+ v.offset
559
+ for k, v in self.row_ranges[start_ref.to_table_id].items()
560
+ if v is not None and v.name == ref.row_end
561
+ ]
562
+ col_end = [
563
+ v.offset
564
+ for k, v in self.col_ranges[start_ref.to_table_id].items()
565
+ if v is not None and v.name == ref.row_end
566
+ ]
567
+ if len(row_end) == 0 and len(col_end) == 0:
568
+ raise ValueError(range_error_message(ref))
569
+ end_ref = CellRange(
570
+ model=self.model,
571
+ to_table_id=start_ref.to_table_id,
572
+ row_start=row_end[0] if row_end else None,
573
+ col_start=col_end[0] if col_end else None,
574
+ )
575
+ elif start_ref.row_start is None:
576
+ return CellRange(
577
+ model=self.model,
578
+ to_table_id=start_ref.to_table_id,
579
+ col_start=start_ref.col_start,
580
+ col_end=end_ref.col_start,
581
+ )
582
+ elif start_ref.col_start is None:
583
+ return CellRange(
584
+ model=self.model,
585
+ to_table_id=start_ref.to_table_id,
586
+ row_start=start_ref.row_start,
587
+ row_end=end_ref.row_start,
588
+ )
589
+ start_ref.row_end = end_ref.row_start
590
+ start_ref.col_end = end_ref.col_start
591
+ return start_ref
592
+
593
+ if ref.row_start:
594
+ return self._deref_name(
595
+ from_table_id,
596
+ ref.name_scope_1,
597
+ ref.name_scope_2,
598
+ ref.row_start,
599
+ )
600
+ return self._deref_name(
601
+ from_table_id,
602
+ ref.name_scope_1,
603
+ ref.name_scope_2,
604
+ ref.col_start,
605
+ )
606
+
607
+ def calculate_named_ranges(self):
608
+ """
609
+ Find the globally unique row and column headers and the table unique
610
+ row and column headers for use in range references. Returns a dict
611
+ mapping table ID to lists of rows and columns and their names if
612
+ they are unique.
613
+ """
614
+ self.doc_name_refs = defaultdict(int)
615
+ self.sheet_name_refs = {}
616
+ self.table_names = self.model.table_names()
617
+ self._calculate_table_name_maps()
618
+
619
+ self.row_ranges = {}
620
+ self.col_ranges = {}
621
+ for sheet_id in self.model.sheet_ids():
622
+ self.sheet_name_refs[sheet_id] = defaultdict(int)
623
+ for table_id in self.model.table_ids(sheet_id):
624
+ self.table_name_refs[table_id] = defaultdict(int)
625
+ self.row_ranges[table_id] = self._calculate_name_scopes(
626
+ sheet_id,
627
+ table_id,
628
+ TableAxis.ROW,
629
+ )
630
+ self.col_ranges[table_id] = self._calculate_name_scopes(
631
+ sheet_id,
632
+ table_id,
633
+ TableAxis.COLUMN,
634
+ )
635
+
636
+ # Re-init the list of document-scoped names if they are unique
637
+ self.doc_name_refs = {
638
+ name: None for name, count in self.doc_name_refs.items() if count == 1
639
+ }
640
+
641
+ for sheet_id in self.model.sheet_ids():
642
+ # Re-init the list of sheet-scoped names if they are unique
643
+ self.sheet_name_refs[sheet_id] = {
644
+ name: None for name, count in self.sheet_name_refs[sheet_id].items() if count == 1
645
+ }
646
+
647
+ for table_id in self.model.table_ids(sheet_id):
648
+ self._calculate_scope_types(
649
+ sheet_id,
650
+ table_id,
651
+ TableAxis.ROW,
652
+ self.row_ranges[table_id],
653
+ )
654
+ self._calculate_scope_types(
655
+ sheet_id,
656
+ table_id,
657
+ TableAxis.COLUMN,
658
+ self.col_ranges[table_id],
659
+ )
660
+
661
+
662
+ # Cell reference conversion from https://github.com/jmcnamara/XlsxWriter
663
+ # Copyright (c) 2013-2021, John McNamara <jmcnamara@cpan.org>
664
+ range_parts = re.compile(r"(\$?)([A-Z]{1,3})(\$?)(\d+)")
665
+
666
+ col_parts = re.compile(r"(\$?)([A-Z]{1,3})")
667
+
668
+
669
+ def xl_col_to_offset(col_str: str) -> int:
670
+ """
671
+ Convert a column reference in A1 notation to a zero indexed column.
672
+
673
+ Parameters
674
+ ----------
675
+ col_str: str
676
+ A1 notation column reference
677
+
678
+ Returns
679
+ -------
680
+ col: int
681
+ Column numbers (zero indexed).
682
+
683
+ """
684
+ if not col_str:
685
+ return 0
686
+
687
+ match = col_parts.match(col_str)
688
+ if not match:
689
+ msg = f"invalid cell reference {col_str}"
690
+ raise IndexError(msg)
691
+
692
+ col_str = match.group(2)
693
+
694
+ # Convert base26 column string to number.
695
+ col = 0
696
+ for expn, char in enumerate(reversed(match.group(2))):
697
+ col += (ord(char) - ord("A") + 1) * (26**expn)
698
+
699
+ # Convert 1-index to zero-index
700
+ col -= 1
701
+
702
+ return col
703
+
704
+
705
+ def xl_cell_to_rowcol(cell_str: str) -> tuple:
706
+ """
707
+ Convert a cell reference in A1 notation to a zero indexed row and column.
708
+
709
+ Parameters
710
+ ----------
711
+ cell_str: str
712
+ A1 notation cell reference
713
+
714
+ Returns
715
+ -------
716
+ row, col: int, int
717
+ Cell row and column numbers (zero indexed).
718
+
719
+ """
720
+ if not cell_str:
721
+ return 0, 0
722
+
723
+ match = range_parts.match(cell_str)
724
+ if not match:
725
+ msg = f"invalid cell reference {cell_str}"
726
+ raise IndexError(msg)
727
+
728
+ col_str = match.group(2)
729
+ row_str = match.group(4)
730
+
731
+ # Convert base26 column string to number.
732
+ col = 0
733
+ for expn, char in enumerate(reversed(col_str)):
734
+ col += (ord(char) - ord("A") + 1) * (26**expn)
735
+
736
+ # Convert 1-index to zero-index
737
+ row = int(row_str) - 1
738
+ col -= 1
739
+
740
+ return row, col
741
+
742
+
743
+ def xl_range(first_row, first_col, last_row, last_col):
744
+ """
745
+ Convert zero indexed row and col cell references to a A1:B1 range string.
746
+
747
+ Parameters
748
+ ----------
749
+ first_row: int
750
+ The first cell row.
751
+ first_col: int
752
+ The first cell column.
753
+ last_row: int
754
+ The last cell row.
755
+ last_col: int
756
+ The last cell column.
757
+
758
+ Returns
759
+ -------
760
+ str:
761
+ A1:B1 style range string.
762
+
763
+ """
764
+ range1 = xl_rowcol_to_cell(first_row, first_col)
765
+ range2 = xl_rowcol_to_cell(last_row, last_col)
766
+
767
+ if range1 == range2:
768
+ return range1
769
+ return range1 + ":" + range2
770
+
771
+
772
+ def xl_rowcol_to_cell(row, col, row_abs=False, col_abs=False):
773
+ """
774
+ Convert a zero indexed row and column cell reference to a A1 style string.
775
+
776
+ Parameters
777
+ ----------
778
+ row: int
779
+ The cell row.
780
+ col: int
781
+ The cell column.
782
+ row_abs: bool
783
+ If ``True``, make the row absolute.
784
+ col_abs: bool
785
+ If ``True``, make the column absolute.
786
+
787
+ Returns
788
+ -------
789
+ str:
790
+ A1 style string.
791
+
792
+ """
793
+ if row < 0:
794
+ msg = f"row reference {row} below zero"
795
+ raise IndexError(msg)
796
+
797
+ if col < 0:
798
+ msg = f"column reference {col} below zero"
799
+ raise IndexError(msg)
800
+
801
+ row += 1 # Change to 1-index.
802
+ row_abs = "$" if row_abs else ""
803
+
804
+ col_str = xl_col_to_name(col, col_abs)
805
+
806
+ return col_str + row_abs + str(row)
807
+
808
+
809
+ def xl_col_to_name(col, col_abs=False):
810
+ """
811
+ Convert a zero indexed column cell reference to a string.
812
+
813
+ Parameters
814
+ ----------
815
+ col: int
816
+ The column number (zero indexed).
817
+ col_abs: bool, default: False
818
+ If ``True``, make the column absolute.
819
+
820
+ Returns
821
+ -------
822
+ str:
823
+ Column in A1 notation.
824
+
825
+ """
826
+ if col < 0:
827
+ msg = f"column reference {col} below zero"
828
+ raise IndexError(msg)
829
+
830
+ col += 1 # Change to 1-index.
831
+ col_str = ""
832
+ col_abs = "$" if col_abs else ""
833
+
834
+ while col:
835
+ # Set remainder from 1 .. 26
836
+ remainder = col % 26
837
+
838
+ if remainder == 0:
839
+ remainder = 26
840
+
841
+ # Convert the remainder to a character.
842
+ col_letter = chr(ord("A") + remainder - 1)
843
+
844
+ # Accumulate the column letters, right to left.
845
+ col_str = col_letter + col_str
846
+
847
+ # Get the next order of magnitude.
848
+ col = int((col - 1) / 26)
849
+
850
+ return col_abs + col_str