offagent 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. offagent/__init__.py +3 -0
  2. offagent/__main__.py +5 -0
  3. offagent/adapters/__init__.py +1 -0
  4. offagent/adapters/docx_adapter.py +1237 -0
  5. offagent/adapters/embedding_provider.py +132 -0
  6. offagent/adapters/pptx_adapter.py +940 -0
  7. offagent/adapters/xlsx_adapter.py +1266 -0
  8. offagent/app/__init__.py +1 -0
  9. offagent/app/progress.py +52 -0
  10. offagent/app/services.py +4267 -0
  11. offagent/config.py +287 -0
  12. offagent/domain/__init__.py +1 -0
  13. offagent/domain/locators.py +444 -0
  14. offagent/domain/models.py +477 -0
  15. offagent/domain/text_fragments.py +136 -0
  16. offagent/errors.py +29 -0
  17. offagent/indexing/__init__.py +1 -0
  18. offagent/indexing/store.py +795 -0
  19. offagent/interfaces/__init__.py +1 -0
  20. offagent/interfaces/cli.py +438 -0
  21. offagent/interfaces/cli_output.py +139 -0
  22. offagent/interfaces/cli_progress.py +120 -0
  23. offagent/interfaces/mcp.py +1145 -0
  24. offagent/interfaces/mcp_converters.py +80 -0
  25. offagent/interfaces/mcp_models.py +923 -0
  26. offagent/objects/__init__.py +3 -0
  27. offagent/objects/base.py +26 -0
  28. offagent/objects/docx_objects.py +951 -0
  29. offagent/objects/pptx_objects.py +895 -0
  30. offagent/objects/xlsx_objects.py +962 -0
  31. offagent/path_policy.py +42 -0
  32. offagent/storage/__init__.py +1 -0
  33. offagent/storage/versioning.py +31 -0
  34. offagent-0.10.0.dist-info/METADATA +546 -0
  35. offagent-0.10.0.dist-info/RECORD +39 -0
  36. offagent-0.10.0.dist-info/WHEEL +5 -0
  37. offagent-0.10.0.dist-info/entry_points.txt +2 -0
  38. offagent-0.10.0.dist-info/licenses/LICENSE +21 -0
  39. offagent-0.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1266 @@
1
+ from __future__ import annotations
2
+
3
+ from copy import copy
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ import re
7
+
8
+ from offagent.domain.locators import parse_locator, to_v2_locator
9
+ from offagent.domain.models import (
10
+ BlockStyle,
11
+ DocumentRef,
12
+ InlineFragment,
13
+ InlineStyle,
14
+ IndexedItem,
15
+ SectionPayload,
16
+ SheetCell,
17
+ SheetSnapshot,
18
+ StructureSection,
19
+ TextContainerSnapshot,
20
+ VisibleTextRange,
21
+ WorkbookStructure,
22
+ WorksheetSummary,
23
+ XlsxSectionCell,
24
+ XlsxRowEmbedding,
25
+ XlsxRowEmbeddingCell,
26
+ )
27
+ from offagent.domain.text_fragments import (
28
+ apply_style_to_range,
29
+ fragment_text,
30
+ normalize_fragments,
31
+ )
32
+ from offagent.errors import InvalidArgumentsError, TargetNotEditableError
33
+ from offagent.errors import TargetNotFoundError
34
+
35
+ try:
36
+ from openpyxl import Workbook, load_workbook
37
+ from openpyxl.cell.rich_text import CellRichText, TextBlock
38
+ from openpyxl.cell.text import InlineFont
39
+ from openpyxl.styles import Alignment, Font, PatternFill
40
+ from openpyxl.utils.cell import (
41
+ coordinate_to_tuple,
42
+ get_column_letter,
43
+ range_boundaries,
44
+ )
45
+ except ModuleNotFoundError: # pragma: no cover - exercised through dependency checks
46
+ Workbook = None
47
+ CellRichText = None
48
+ Alignment = None
49
+ Font = None
50
+ InlineFont = None
51
+ PatternFill = None
52
+ TextBlock = None
53
+ load_workbook = None
54
+ coordinate_to_tuple = None
55
+ get_column_letter = None
56
+ range_boundaries = None
57
+
58
+
59
+ @dataclass(frozen=True)
60
+ class ResolvedCell:
61
+ sheet_name: str
62
+ coordinate: str
63
+ raw_value: object
64
+ formula: str | None
65
+ display_text: str
66
+
67
+
68
+ class TargetNotAppendableError(TargetNotEditableError):
69
+ """Raised when a requested XLSX target cannot accept append text."""
70
+
71
+
72
+ NUMERIC_TEXT_PATTERN = re.compile(
73
+ r"""
74
+ ^\s*
75
+ [\(\+\-]?
76
+ [$€£]?
77
+ (?:
78
+ \d{1,3}(?:,\d{3})+ |
79
+ \d+
80
+ )
81
+ (?:\.\d+)?
82
+ %?
83
+ \)?
84
+ \s*$
85
+ """,
86
+ re.VERBOSE,
87
+ )
88
+
89
+
90
+ def extract_document(document_path: Path) -> list[IndexedItem]:
91
+ workbook = _open_workbook(document_path)
92
+ items: list[IndexedItem] = []
93
+
94
+ for worksheet in workbook.worksheets:
95
+ indexed_cells: list[tuple[object, str | None, str]] = []
96
+ row_contexts: dict[int, list[tuple[str, str]]] = {}
97
+ column_contexts: dict[int, list[tuple[str, str]]] = {}
98
+
99
+ for row in worksheet.iter_rows():
100
+ for cell in row:
101
+ if not _is_indexable_cell(cell):
102
+ continue
103
+
104
+ formula = _formula_text(cell)
105
+ display_text = _display_text(cell)
106
+ indexed_cells.append((cell, formula, display_text))
107
+ row_contexts.setdefault(cell.row, []).append(
108
+ (cell.coordinate, display_text)
109
+ )
110
+ column_contexts.setdefault(cell.column, []).append(
111
+ (cell.coordinate, display_text)
112
+ )
113
+
114
+ for cell, formula, display_text in indexed_cells:
115
+ item_id = make_item_id(worksheet.title, cell.coordinate)
116
+ items.append(
117
+ IndexedItem(
118
+ item_id=item_id,
119
+ item_type="cell",
120
+ locator=item_id,
121
+ preview=display_text[:120],
122
+ content_text=display_text,
123
+ metadata={
124
+ "sheet_name": worksheet.title,
125
+ "coordinate": cell.coordinate,
126
+ "raw_value": _metadata_raw_value(cell.value),
127
+ "formula": formula,
128
+ "display_text": display_text,
129
+ "data_type": cell.data_type,
130
+ "row_context": _context_text(
131
+ row_contexts[cell.row], exclude=cell.coordinate
132
+ ),
133
+ "column_context": _context_text(
134
+ column_contexts[cell.column],
135
+ exclude=cell.coordinate,
136
+ ),
137
+ },
138
+ )
139
+ )
140
+
141
+ return items
142
+
143
+
144
+ def build_embedding_text(item: IndexedItem, document_path: Path) -> str:
145
+ metadata = item.metadata
146
+ return "\n".join(
147
+ [
148
+ f"Workbook: {document_path.name}",
149
+ f"Sheet: {metadata.get('sheet_name', '')}",
150
+ f"Cell: {metadata.get('coordinate', '')}",
151
+ f"Row Context: {metadata.get('row_context', '')}",
152
+ f"Column Context: {metadata.get('column_context', '')}",
153
+ f"Value: {metadata.get('display_text', item.content_text)}",
154
+ ]
155
+ )
156
+
157
+
158
+ def build_row_embeddings(
159
+ items: list[IndexedItem], document_path: Path
160
+ ) -> list[XlsxRowEmbedding]:
161
+ grouped: dict[tuple[str, int], list[IndexedItem]] = {}
162
+
163
+ for item in items:
164
+ if not _is_text_like_item(item):
165
+ continue
166
+ metadata = item.metadata
167
+ coordinate = str(metadata.get("coordinate", ""))
168
+ grouped.setdefault(
169
+ (str(metadata.get("sheet_name", "")), _row_number(coordinate)),
170
+ [],
171
+ ).append(item)
172
+
173
+ row_embeddings: list[XlsxRowEmbedding] = []
174
+ for (sheet_name, row_number), row_items in sorted(grouped.items()):
175
+ ordered_items = sorted(
176
+ row_items,
177
+ key=lambda item: _coordinate_sort_key(
178
+ str(item.metadata.get("coordinate", ""))
179
+ ),
180
+ )
181
+ contributing_cells = tuple(
182
+ XlsxRowEmbeddingCell(
183
+ item_id=item.item_id,
184
+ coordinate=str(item.metadata.get("coordinate", "")),
185
+ display_text=str(item.metadata.get("display_text", item.content_text)),
186
+ preview=item.preview,
187
+ )
188
+ for item in ordered_items
189
+ )
190
+ representative = max(
191
+ ordered_items,
192
+ key=lambda item: _representative_score(
193
+ str(item.metadata.get("coordinate", "")), item
194
+ ),
195
+ )
196
+ row_embeddings.append(
197
+ XlsxRowEmbedding(
198
+ sheet_name=sheet_name,
199
+ row_number=row_number,
200
+ text=_build_row_embedding_text(
201
+ document_path.name,
202
+ sheet_name,
203
+ row_number,
204
+ contributing_cells,
205
+ ),
206
+ preview=representative.preview,
207
+ representative_item_id=representative.item_id,
208
+ contributing_cells=contributing_cells,
209
+ )
210
+ )
211
+
212
+ return row_embeddings
213
+
214
+
215
+ def read_cell(document_path: Path, item_id: str) -> str:
216
+ resolved = resolve_cell(document_path, item_id)
217
+ return resolved.display_text
218
+
219
+
220
+ def create_xlsx(output_path: Path, initial_sheet_name: str | None = None) -> Path:
221
+ if Workbook is None:
222
+ raise RuntimeError("openpyxl is required for XLSX operations.")
223
+ workbook = Workbook()
224
+ workbook.active.title = initial_sheet_name or "Sheet1"
225
+ workbook.save(output_path)
226
+ return output_path
227
+
228
+
229
+ def add_sheet(
230
+ document_path: Path,
231
+ name: str,
232
+ output_path: Path | None = None,
233
+ ) -> tuple[Path, str]:
234
+ workbook = _open_workbook(document_path)
235
+ if not name.strip():
236
+ raise InvalidArgumentsError("Worksheet name cannot be empty.")
237
+ if name in workbook.sheetnames:
238
+ raise InvalidArgumentsError(f"Worksheet {name!r} already exists.")
239
+ workbook.create_sheet(title=name)
240
+ target_path = _target_path(document_path, output_path)
241
+ workbook.save(target_path)
242
+ return target_path, f"xlsx:sheet:{name}"
243
+
244
+
245
+ def write_cell(
246
+ document_path: Path, item_id: str, value: object, output_path: Path | None = None
247
+ ) -> Path:
248
+ workbook = _open_workbook(document_path)
249
+ cell = _resolve_cell(workbook, item_id)
250
+ cell.value = _coerce_write_value(value)
251
+ target_path = _target_path(document_path, output_path)
252
+ workbook.save(target_path)
253
+ return target_path
254
+
255
+
256
+ def add_row(
257
+ document_path: Path,
258
+ sheet_locator: str,
259
+ values: list[object],
260
+ output_path: Path | None = None,
261
+ ) -> tuple[Path, str]:
262
+ workbook = _open_workbook(document_path)
263
+ sheet_name = _sheet_name_from_locator(sheet_locator)
264
+ worksheet = _resolve_worksheet(workbook, sheet_name)
265
+ target_row = _last_used_row(worksheet) + 1
266
+ for column_index, value in enumerate(values, start=1):
267
+ worksheet.cell(row=target_row, column=column_index).value = _coerce_write_value(
268
+ value
269
+ )
270
+ target_path = _target_path(document_path, output_path)
271
+ workbook.save(target_path)
272
+ return target_path, f"xlsx:sheet:{sheet_name}:row:{target_row}"
273
+
274
+
275
+ def append_cell(
276
+ document_path: Path, item_id: str, text: str, output_path: Path | None = None
277
+ ) -> Path:
278
+ workbook = _open_workbook(document_path)
279
+ cell = _resolve_cell(workbook, item_id)
280
+ formula = _formula_text(cell)
281
+ if formula is not None or (
282
+ cell.value is not None and not isinstance(cell.value, str)
283
+ ):
284
+ raise TargetNotAppendableError("target not appendable; use write-cell")
285
+ if cell.value is None:
286
+ cell.value = text
287
+ else:
288
+ cell.value = f"{cell.value}{text}"
289
+ target_path = _target_path(document_path, output_path)
290
+ workbook.save(target_path)
291
+ return target_path
292
+
293
+
294
+ def resolve_cell(document_path: Path, item_id: str) -> ResolvedCell:
295
+ workbook = _open_workbook(document_path)
296
+ cell = _resolve_cell(workbook, item_id)
297
+ return ResolvedCell(
298
+ sheet_name=cell.parent.title,
299
+ coordinate=cell.coordinate,
300
+ raw_value=cell.value,
301
+ formula=_formula_text(cell),
302
+ display_text=_display_text(cell),
303
+ )
304
+
305
+
306
+ def resolve_structure(document_path: Path) -> tuple[StructureSection, ...]:
307
+ workbook = _open_workbook(document_path)
308
+ sections: list[StructureSection] = []
309
+
310
+ for worksheet in workbook.worksheets:
311
+ anchor = _first_indexable_cell(worksheet)
312
+ locator = make_item_id(
313
+ worksheet.title, anchor.coordinate if anchor is not None else "A1"
314
+ )
315
+ preview = "" if anchor is None else _display_text(anchor)[:120]
316
+ sections.append(
317
+ StructureSection(
318
+ locator=locator,
319
+ section_type="worksheet",
320
+ preview=preview,
321
+ metadata={
322
+ "sheet_name": worksheet.title,
323
+ "used_range": _format_range(_used_bounds(worksheet)),
324
+ "max_row": worksheet.max_row,
325
+ "max_column": worksheet.max_column,
326
+ "cell_count": _cell_count(worksheet),
327
+ },
328
+ )
329
+ )
330
+
331
+ return tuple(sections)
332
+
333
+
334
+ def get_section(
335
+ document_path: Path, locator: str, *, cell_range: str | None = None
336
+ ) -> SectionPayload:
337
+ sheet_name, _ = parse_item_id(locator)
338
+ snapshot = get_sheet_snapshot(document_path, sheet_name, cell_range=cell_range)
339
+ return SectionPayload(
340
+ document=snapshot.document,
341
+ locator=locator,
342
+ section_type="worksheet",
343
+ preview=next(
344
+ (cell.display_value for cell in snapshot.cells if cell.display_value), ""
345
+ ),
346
+ metadata={**snapshot.metadata, "sheet_name": sheet_name},
347
+ sheet_name=sheet_name,
348
+ cells=tuple(
349
+ XlsxSectionCell(
350
+ locator=make_item_id(sheet_name, cell.coordinate),
351
+ coordinate=cell.coordinate,
352
+ row=cell.row,
353
+ column=cell.column,
354
+ display_value=cell.display_value,
355
+ formula=cell.metadata.get("formula"),
356
+ metadata=cell.metadata,
357
+ )
358
+ for cell in snapshot.cells
359
+ ),
360
+ )
361
+
362
+
363
+ def read_node(document_path: Path, locator: str) -> tuple[str, str, dict[str, object]]:
364
+ resolved = resolve_cell(document_path, locator)
365
+ return (
366
+ "cell",
367
+ resolved.display_text,
368
+ {
369
+ "sheet_name": resolved.sheet_name,
370
+ "coordinate": resolved.coordinate,
371
+ "formula": resolved.formula,
372
+ "raw_value": resolved.raw_value,
373
+ },
374
+ )
375
+
376
+
377
+ def write_node(
378
+ document_path: Path, locator: str, value: str, output_path: Path | None = None
379
+ ) -> Path:
380
+ return write_cell(document_path, locator, value, output_path)
381
+
382
+
383
+ def get_workbook_structure(document_path: Path) -> WorkbookStructure:
384
+ workbook = _open_workbook(document_path)
385
+ sheets: list[WorksheetSummary] = []
386
+
387
+ for position, worksheet in enumerate(workbook.worksheets):
388
+ used_bounds = _used_bounds(worksheet)
389
+ preview = ""
390
+ if used_bounds is not None:
391
+ min_row, min_col, max_row, max_col = used_bounds
392
+ for row in worksheet.iter_rows(
393
+ min_row=min_row,
394
+ max_row=max_row,
395
+ min_col=min_col,
396
+ max_col=max_col,
397
+ ):
398
+ for cell in row:
399
+ text = _display_text(cell).strip()
400
+ if text:
401
+ preview = text[:120]
402
+ break
403
+ if preview:
404
+ break
405
+ sheets.append(
406
+ WorksheetSummary(
407
+ position=position,
408
+ sheet_name=worksheet.title,
409
+ preview=preview,
410
+ metadata={
411
+ "used_range": _format_range(used_bounds),
412
+ "max_row": worksheet.max_row,
413
+ "max_column": worksheet.max_column,
414
+ },
415
+ )
416
+ )
417
+
418
+ return WorkbookStructure(
419
+ document=_document_ref(document_path), sheets=tuple(sheets)
420
+ )
421
+
422
+
423
+ def get_sheet_snapshot(
424
+ document_path: Path,
425
+ sheet_name: str,
426
+ *,
427
+ cell_range: str | None = None,
428
+ start_cell: str | None = None,
429
+ row_count: int | None = None,
430
+ column_count: int | None = None,
431
+ ) -> SheetSnapshot:
432
+ workbook = _open_workbook(document_path)
433
+ worksheet = _resolve_worksheet(workbook, sheet_name)
434
+ bounds = _snapshot_bounds(
435
+ worksheet,
436
+ cell_range=cell_range,
437
+ start_cell=start_cell,
438
+ row_count=row_count,
439
+ column_count=column_count,
440
+ )
441
+
442
+ cells: list[SheetCell] = []
443
+ if bounds is not None:
444
+ min_row, min_col, max_row, max_col = bounds
445
+ for row in worksheet.iter_rows(
446
+ min_row=min_row,
447
+ max_row=max_row,
448
+ min_col=min_col,
449
+ max_col=max_col,
450
+ ):
451
+ for cell in row:
452
+ cells.append(
453
+ SheetCell(
454
+ coordinate=cell.coordinate,
455
+ row=cell.row,
456
+ column=cell.column,
457
+ display_value=_display_text(cell),
458
+ metadata={
459
+ "raw_value": _metadata_raw_value(cell.value),
460
+ "formula": _formula_text(cell),
461
+ "data_type": cell.data_type,
462
+ },
463
+ )
464
+ )
465
+
466
+ return SheetSnapshot(
467
+ document=_document_ref(document_path),
468
+ sheet_name=worksheet.title,
469
+ cells=tuple(cells),
470
+ metadata={
471
+ "range": _format_range(bounds),
472
+ "row_count": 0 if bounds is None else bounds[2] - bounds[0] + 1,
473
+ "column_count": 0 if bounds is None else bounds[3] - bounds[1] + 1,
474
+ },
475
+ )
476
+
477
+
478
+ def append_row(
479
+ document_path: Path,
480
+ sheet_name: str,
481
+ *,
482
+ values: list[object] | None = None,
483
+ record: dict[str, object] | None = None,
484
+ output_path: Path | None = None,
485
+ ) -> tuple[Path, int, tuple[str, ...]]:
486
+ if (values is None) == (record is None):
487
+ raise InvalidArgumentsError(
488
+ "append_row requires exactly one of values or record."
489
+ )
490
+
491
+ workbook = _open_workbook(document_path)
492
+ worksheet = _resolve_worksheet(workbook, sheet_name)
493
+ target_row = _last_used_row(worksheet) + 1
494
+ written_coordinates: list[str] = []
495
+
496
+ if values is not None:
497
+ for column_index, value in enumerate(values, start=1):
498
+ coordinate = f"{get_column_letter(column_index)}{target_row}"
499
+ worksheet[coordinate] = _coerce_write_value(value)
500
+ written_coordinates.append(coordinate)
501
+ else:
502
+ header_map = _header_map(worksheet)
503
+ if not header_map:
504
+ raise InvalidArgumentsError(
505
+ "append_row record writes require an existing header row in the worksheet."
506
+ )
507
+ for key, value in record.items():
508
+ if key not in header_map:
509
+ raise InvalidArgumentsError(
510
+ f"Unknown worksheet header for append_row: {key}"
511
+ )
512
+ coordinate = f"{get_column_letter(header_map[key])}{target_row}"
513
+ worksheet[coordinate] = _coerce_write_value(value)
514
+ written_coordinates.append(coordinate)
515
+
516
+ target_path = _target_path(document_path, output_path)
517
+ workbook.save(target_path)
518
+ return target_path, target_row, tuple(written_coordinates)
519
+
520
+
521
+ def write_table(
522
+ document_path: Path,
523
+ sheet_name: str,
524
+ *,
525
+ rows: list[list[object]] | None = None,
526
+ records: list[dict[str, object]] | None = None,
527
+ column_mapping: dict[str, str] | None = None,
528
+ output_path: Path | None = None,
529
+ ) -> tuple[Path, int, int]:
530
+ if (rows is None) == (records is None):
531
+ raise InvalidArgumentsError(
532
+ "write_table requires exactly one of rows or records."
533
+ )
534
+
535
+ workbook = _open_workbook(document_path)
536
+ worksheet = _resolve_worksheet(workbook, sheet_name)
537
+ start_row = _last_used_row(worksheet) + 1
538
+
539
+ if rows is not None:
540
+ for row_offset, row_values in enumerate(rows):
541
+ for column_index, value in enumerate(row_values, start=1):
542
+ worksheet.cell(
543
+ row=start_row + row_offset,
544
+ column=column_index,
545
+ ).value = _coerce_write_value(value)
546
+ end_row = start_row + len(rows) - 1
547
+ else:
548
+ resolved_mapping = _resolve_record_mapping(worksheet, column_mapping)
549
+ for row_offset, record in enumerate(records):
550
+ for key, value in record.items():
551
+ if key not in resolved_mapping:
552
+ raise InvalidArgumentsError(
553
+ f"Unknown worksheet mapping for write_table field: {key}"
554
+ )
555
+ worksheet.cell(
556
+ row=start_row + row_offset,
557
+ column=resolved_mapping[key],
558
+ ).value = _coerce_write_value(value)
559
+ end_row = start_row + len(records) - 1
560
+
561
+ target_path = _target_path(document_path, output_path)
562
+ workbook.save(target_path)
563
+ return target_path, start_row, end_row
564
+
565
+
566
+ def parse_item_id(item_id: str) -> tuple[str, str]:
567
+ if not item_id.startswith("sheet:"):
568
+ raise InvalidArgumentsError(f"Unsupported XLSX item id: {item_id}")
569
+
570
+ payload = item_id.removeprefix("sheet:")
571
+ if "!" not in payload:
572
+ raise InvalidArgumentsError(f"Invalid XLSX item id: {item_id}")
573
+
574
+ sheet_name, coordinate = payload.rsplit("!", maxsplit=1)
575
+ if not sheet_name:
576
+ raise InvalidArgumentsError(f"Invalid XLSX sheet name in item id: {item_id}")
577
+ normalized_coordinate = _normalize_coordinate(coordinate)
578
+ return sheet_name, normalized_coordinate
579
+
580
+
581
+ def make_item_id(sheet_name: str, coordinate: str) -> str:
582
+ return f"sheet:{sheet_name}!{_normalize_coordinate(coordinate)}"
583
+
584
+
585
+ def style_cell_inline(
586
+ document_path: Path,
587
+ locator: str,
588
+ style: InlineStyle,
589
+ clear_fields: list[str] | tuple[str, ...],
590
+ output_path: Path | None = None,
591
+ ) -> tuple[Path, str, dict[str, object]]:
592
+ workbook = _open_workbook(document_path)
593
+ canonical = to_v2_locator(locator, file_type="xlsx")
594
+ cell = _resolve_cell(workbook, _legacy_item_id_from_v2(canonical))
595
+ clear_set = _normalize_clear_fields(clear_fields, _INLINE_STYLE_FIELDS)
596
+ skipped_fields = _apply_xlsx_inline_style(cell, style, clear_set)
597
+ target_path = _target_path(document_path, output_path)
598
+ workbook.save(target_path)
599
+ return (
600
+ target_path,
601
+ canonical,
602
+ {"cleared_fields": clear_set, "skipped_fields": skipped_fields},
603
+ )
604
+
605
+
606
+ def read_cell_fragments(document_path: Path, locator: str) -> TextContainerSnapshot:
607
+ workbook = _open_workbook(document_path)
608
+ canonical = to_v2_locator(locator, file_type="xlsx")
609
+ cell = _resolve_cell(workbook, _legacy_item_id_from_v2(canonical))
610
+ _ensure_partial_formatting_cell_supported(cell, canonical)
611
+ fragments = _read_xlsx_fragments(cell)
612
+ return TextContainerSnapshot(
613
+ locator=canonical,
614
+ object_type="cell",
615
+ text=fragment_text(fragments),
616
+ fragments=fragments,
617
+ metadata={"sheet_name": cell.parent.title, "coordinate": cell.coordinate},
618
+ )
619
+
620
+
621
+ def write_cell_fragments(
622
+ document_path: Path,
623
+ locator: str,
624
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
625
+ output_path: Path | None = None,
626
+ ) -> tuple[Path, str, TextContainerSnapshot]:
627
+ workbook = _open_workbook(document_path)
628
+ canonical = to_v2_locator(locator, file_type="xlsx")
629
+ cell = _resolve_cell(workbook, _legacy_item_id_from_v2(canonical))
630
+ _ensure_partial_formatting_cell_supported(cell, canonical)
631
+ normalized = normalize_fragments(fragments)
632
+ _write_xlsx_fragments(cell, normalized)
633
+ target_path = _target_path(document_path, output_path)
634
+ workbook.save(target_path)
635
+ snapshot = TextContainerSnapshot(
636
+ locator=canonical,
637
+ object_type="cell",
638
+ text=fragment_text(normalized),
639
+ fragments=normalized,
640
+ metadata={"sheet_name": cell.parent.title, "coordinate": cell.coordinate},
641
+ )
642
+ return target_path, canonical, snapshot
643
+
644
+
645
+ def style_cell_range(
646
+ document_path: Path,
647
+ locator: str,
648
+ text_range: VisibleTextRange,
649
+ style: InlineStyle,
650
+ clear_fields: list[str] | tuple[str, ...],
651
+ output_path: Path | None = None,
652
+ ) -> tuple[Path, str, dict[str, object]]:
653
+ snapshot = read_cell_fragments(document_path, locator)
654
+ clear_set = _normalize_clear_fields(clear_fields, _INLINE_STYLE_FIELDS)
655
+ styled = apply_style_to_range(
656
+ snapshot.fragments, text_range, style=style, clear_fields=clear_set
657
+ )
658
+ target_path, canonical, rewritten = write_cell_fragments(
659
+ document_path,
660
+ locator,
661
+ styled,
662
+ output_path=output_path,
663
+ )
664
+ return (
665
+ target_path,
666
+ canonical,
667
+ {
668
+ "cleared_fields": clear_set,
669
+ "range": {"start": text_range.start, "end": text_range.end},
670
+ "text": rewritten.text,
671
+ },
672
+ )
673
+
674
+
675
+ def style_cell_block(
676
+ document_path: Path,
677
+ locator: str,
678
+ style: BlockStyle,
679
+ clear_fields: list[str] | tuple[str, ...],
680
+ output_path: Path | None = None,
681
+ ) -> tuple[Path, str, dict[str, object]]:
682
+ workbook = _open_workbook(document_path)
683
+ canonical = to_v2_locator(locator, file_type="xlsx")
684
+ cell = _resolve_cell(workbook, _legacy_item_id_from_v2(canonical))
685
+ clear_set = _normalize_clear_fields(clear_fields, _BLOCK_STYLE_FIELDS)
686
+ skipped_fields = _apply_xlsx_block_style(cell, style, clear_set)
687
+ target_path = _target_path(document_path, output_path)
688
+ workbook.save(target_path)
689
+ return (
690
+ target_path,
691
+ canonical,
692
+ {"cleared_fields": clear_set, "skipped_fields": skipped_fields},
693
+ )
694
+
695
+
696
+ def _open_workbook(document_path: Path):
697
+ if load_workbook is None:
698
+ raise RuntimeError("openpyxl is required for XLSX operations.")
699
+ return load_workbook(str(document_path), rich_text=True)
700
+
701
+
702
+ def _document_ref(document_path: Path) -> DocumentRef:
703
+ resolved_path = document_path.resolve()
704
+ stat = resolved_path.stat()
705
+ return DocumentRef(
706
+ document_id=resolved_path.as_posix(),
707
+ path=resolved_path,
708
+ file_type="xlsx",
709
+ display_name=resolved_path.name,
710
+ modified_time=stat.st_mtime,
711
+ )
712
+
713
+
714
+ def _resolve_cell(workbook, item_id: str):
715
+ sheet_name, coordinate = parse_item_id(item_id)
716
+ worksheet = _resolve_worksheet(workbook, sheet_name)
717
+ return worksheet[coordinate]
718
+
719
+
720
+ def _resolve_worksheet(workbook, sheet_name: str):
721
+ try:
722
+ return workbook[sheet_name]
723
+ except KeyError as exc:
724
+ raise TargetNotFoundError(
725
+ f"Worksheet {sheet_name!r} does not exist in the workbook."
726
+ ) from exc
727
+
728
+
729
+ def _is_indexable_cell(cell) -> bool:
730
+ return _formula_text(cell) is not None or cell.value is not None
731
+
732
+
733
+ def _formula_text(cell) -> str | None:
734
+ if getattr(cell, "data_type", None) == "f" and cell.value is not None:
735
+ return str(cell.value)
736
+ return None
737
+
738
+
739
+ def _display_text(cell) -> str:
740
+ formula = _formula_text(cell)
741
+ if formula is not None:
742
+ return formula
743
+ return "" if cell.value is None else str(cell.value)
744
+
745
+
746
+ def _metadata_raw_value(value: object) -> object:
747
+ if CellRichText is not None and isinstance(value, CellRichText):
748
+ return str(value)
749
+ return value
750
+
751
+
752
+ def _coerce_value(value: str) -> object:
753
+ for converter in (int, float):
754
+ try:
755
+ return converter(value)
756
+ except ValueError:
757
+ continue
758
+ return value
759
+
760
+
761
+ def _coerce_write_value(value: object) -> object:
762
+ if isinstance(value, str):
763
+ return _coerce_value(value)
764
+ return value
765
+
766
+
767
+ def _normalize_coordinate(coordinate: str) -> str:
768
+ normalized = coordinate.strip().upper()
769
+ if not normalized:
770
+ raise InvalidArgumentsError("Cell coordinate cannot be empty.")
771
+ if coordinate_to_tuple is None:
772
+ raise RuntimeError("openpyxl is required for XLSX operations.")
773
+ try:
774
+ coordinate_to_tuple(normalized)
775
+ except ValueError as exc:
776
+ raise InvalidArgumentsError(
777
+ f"Invalid XLSX cell coordinate: {coordinate}"
778
+ ) from exc
779
+ return normalized
780
+
781
+
782
+ def _context_text(entries: list[tuple[str, str]], *, exclude: str) -> str:
783
+ return " | ".join(
784
+ display_text
785
+ for coordinate, display_text in entries
786
+ if coordinate != exclude and display_text
787
+ )
788
+
789
+
790
+ def _target_path(document_path: Path, output_path: Path | None) -> Path:
791
+ return document_path if output_path is None else output_path
792
+
793
+
794
+ def _sheet_name_from_locator(locator: str) -> str:
795
+ canonical = to_v2_locator(locator, file_type="xlsx")
796
+ components = parse_locator(canonical).components
797
+ if len(components) == 3 and components[:2] == ("xlsx", "sheet"):
798
+ return components[2]
799
+ raise InvalidArgumentsError(f"Unsupported worksheet locator: {locator}")
800
+
801
+
802
+ def _legacy_item_id_from_v2(locator: str) -> str:
803
+ components = parse_locator(locator).components
804
+ if len(components) == 4 and components[:2] == ("xlsx", "sheet"):
805
+ return make_item_id(components[2], components[3])
806
+ raise InvalidArgumentsError(f"XLSX cell locator required: {locator}")
807
+
808
+
809
+ def _ensure_partial_formatting_cell_supported(cell, locator: str) -> None:
810
+ if _formula_text(cell) is not None:
811
+ raise TargetNotEditableError(
812
+ f"{locator} does not support partial formatting for formula cells."
813
+ )
814
+ if isinstance(cell.value, bool):
815
+ raise TargetNotEditableError(
816
+ f"{locator} does not support partial formatting for boolean cells."
817
+ )
818
+ if cell.coordinate in {
819
+ merged.split(":")[0] for merged in map(str, cell.parent.merged_cells.ranges)
820
+ }:
821
+ return
822
+ for merged in cell.parent.merged_cells.ranges:
823
+ if cell.coordinate in merged:
824
+ raise TargetNotEditableError(
825
+ f"{locator} does not support partial formatting for merged cells."
826
+ )
827
+ if cell.value is None:
828
+ return
829
+ if CellRichText is not None and isinstance(cell.value, CellRichText):
830
+ return
831
+ if not isinstance(cell.value, str):
832
+ raise TargetNotEditableError(
833
+ f"{locator} does not support partial formatting for non-string cells."
834
+ )
835
+
836
+
837
+ def _read_xlsx_fragments(cell) -> tuple[InlineFragment, ...]:
838
+ value = cell.value
839
+ if value is None:
840
+ return ()
841
+ if CellRichText is not None and isinstance(value, CellRichText):
842
+ fragments: list[InlineFragment] = []
843
+ for part in value:
844
+ if isinstance(part, str):
845
+ fragments.append(InlineFragment(text=part, style=InlineStyle()))
846
+ continue
847
+ fragments.append(
848
+ InlineFragment(
849
+ text=part.text,
850
+ style=_inline_style_from_xlsx_font(part.font),
851
+ )
852
+ )
853
+ return normalize_fragments(fragments)
854
+ return (InlineFragment(text=str(value), style=InlineStyle()),)
855
+
856
+
857
+ def _write_xlsx_fragments(
858
+ cell,
859
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
860
+ ) -> None:
861
+ normalized = normalize_fragments(fragments)
862
+ if not normalized:
863
+ cell.value = ""
864
+ return
865
+ if CellRichText is None or TextBlock is None or InlineFont is None:
866
+ raise RuntimeError(
867
+ "openpyxl rich-text support is required for XLSX partial formatting."
868
+ )
869
+ rich_parts: list[object] = []
870
+ for fragment in normalized:
871
+ if all(value is None for value in fragment.style.__dict__.values()):
872
+ rich_parts.append(fragment.text)
873
+ continue
874
+ rich_parts.append(TextBlock(_xlsx_inline_font(fragment.style), fragment.text))
875
+ cell.value = CellRichText(*rich_parts)
876
+
877
+
878
+ def _inline_style_from_xlsx_font(font) -> InlineStyle:
879
+ if font is None:
880
+ return InlineStyle()
881
+ color = None
882
+ if getattr(font, "color", None) not in {None, ""}:
883
+ if isinstance(font.color, str):
884
+ color = font.color[-6:]
885
+ elif getattr(font.color, "rgb", None):
886
+ color = str(font.color.rgb)[-6:]
887
+ underline = None
888
+ if getattr(font, "u", None) is not None:
889
+ underline = str(font.u).lower() not in {"", "none"}
890
+ return InlineStyle(
891
+ bold=getattr(font, "b", getattr(font, "bold", None)),
892
+ italic=getattr(font, "i", getattr(font, "italic", None)),
893
+ underline=underline
894
+ if underline is not None
895
+ else getattr(font, "underline", None),
896
+ strike=getattr(font, "strike", None),
897
+ font_name=getattr(font, "rFont", getattr(font, "name", None)),
898
+ font_size=getattr(font, "sz", None),
899
+ font_color=color,
900
+ )
901
+
902
+
903
+ def _xlsx_inline_font(style: InlineStyle):
904
+ return InlineFont(
905
+ b=style.bold,
906
+ i=style.italic,
907
+ strike=style.strike,
908
+ rFont=style.font_name,
909
+ sz=style.font_size,
910
+ color=None
911
+ if style.font_color is None
912
+ else _normalize_hex_color(style.font_color),
913
+ u="single" if style.underline else None,
914
+ )
915
+
916
+
917
+ _INLINE_STYLE_FIELDS = frozenset(
918
+ {
919
+ "bold",
920
+ "italic",
921
+ "underline",
922
+ "strike",
923
+ "font_name",
924
+ "font_size",
925
+ "font_color",
926
+ "highlight",
927
+ }
928
+ )
929
+ _BLOCK_STYLE_FIELDS = frozenset(
930
+ {
931
+ "alignment",
932
+ "indent_level",
933
+ "left_indent",
934
+ "right_indent",
935
+ "spacing_before",
936
+ "spacing_after",
937
+ "line_spacing",
938
+ "wrap_text",
939
+ "vertical_alignment",
940
+ "fill_color",
941
+ "number_format",
942
+ }
943
+ )
944
+ _XLSX_ALIGNMENT_MAP = {
945
+ "left": "left",
946
+ "center": "center",
947
+ "right": "right",
948
+ "justify": "justify",
949
+ }
950
+ _XLSX_VERTICAL_ALIGNMENT_MAP = {
951
+ "top": "top",
952
+ "center": "center",
953
+ "bottom": "bottom",
954
+ }
955
+
956
+
957
+ def _normalize_clear_fields(
958
+ clear_fields: list[str] | tuple[str, ...],
959
+ allowed: frozenset[str],
960
+ ) -> tuple[str, ...]:
961
+ normalized: list[str] = []
962
+ seen: set[str] = set()
963
+ for field_name in clear_fields:
964
+ if field_name not in allowed:
965
+ raise InvalidArgumentsError(
966
+ f"Unknown style field in clear_fields: {field_name}"
967
+ )
968
+ if field_name not in seen:
969
+ normalized.append(field_name)
970
+ seen.add(field_name)
971
+ return tuple(normalized)
972
+
973
+
974
+ def _apply_xlsx_inline_style(
975
+ cell, style: InlineStyle, clear_fields: tuple[str, ...]
976
+ ) -> list[str]:
977
+ font = copy(cell.font)
978
+ clear_set = set(clear_fields)
979
+ skipped_fields: list[str] = []
980
+
981
+ if "bold" in clear_set:
982
+ font.bold = None
983
+ elif style.bold is not None:
984
+ font.bold = style.bold
985
+
986
+ if "italic" in clear_set:
987
+ font.italic = None
988
+ elif style.italic is not None:
989
+ font.italic = style.italic
990
+
991
+ if "underline" in clear_set:
992
+ font.underline = None
993
+ elif style.underline is not None:
994
+ font.underline = "single" if style.underline else None
995
+
996
+ if "strike" in clear_set:
997
+ font.strike = None
998
+ elif style.strike is not None:
999
+ font.strike = style.strike
1000
+
1001
+ if "font_name" in clear_set:
1002
+ font.name = None
1003
+ elif style.font_name is not None:
1004
+ font.name = style.font_name
1005
+
1006
+ if "font_size" in clear_set:
1007
+ font.sz = None
1008
+ elif style.font_size is not None:
1009
+ font.sz = style.font_size
1010
+
1011
+ if "font_color" in clear_set:
1012
+ font.color = None
1013
+ elif style.font_color is not None:
1014
+ font.color = _normalize_hex_color(style.font_color)
1015
+
1016
+ if style.highlight is not None or "highlight" in clear_set:
1017
+ skipped_fields.append("highlight")
1018
+
1019
+ cell.font = font
1020
+ return skipped_fields
1021
+
1022
+
1023
+ def _apply_xlsx_block_style(
1024
+ cell, style: BlockStyle, clear_fields: tuple[str, ...]
1025
+ ) -> list[str]:
1026
+ alignment = copy(cell.alignment)
1027
+ clear_set = set(clear_fields)
1028
+ skipped_fields: list[str] = []
1029
+
1030
+ if "alignment" in clear_set:
1031
+ alignment.horizontal = None
1032
+ elif style.alignment is not None:
1033
+ alignment.horizontal = _xlsx_alignment_value(style.alignment)
1034
+
1035
+ if "wrap_text" in clear_set:
1036
+ alignment.wrap_text = None
1037
+ elif style.wrap_text is not None:
1038
+ alignment.wrap_text = style.wrap_text
1039
+
1040
+ if "vertical_alignment" in clear_set:
1041
+ alignment.vertical = None
1042
+ elif style.vertical_alignment is not None:
1043
+ alignment.vertical = _xlsx_vertical_alignment_value(style.vertical_alignment)
1044
+
1045
+ if "indent_level" in clear_set:
1046
+ alignment.indent = 0
1047
+ elif style.indent_level is not None:
1048
+ alignment.indent = style.indent_level
1049
+
1050
+ for field_name in (
1051
+ "left_indent",
1052
+ "right_indent",
1053
+ "spacing_before",
1054
+ "spacing_after",
1055
+ "line_spacing",
1056
+ ):
1057
+ if getattr(style, field_name) is not None or field_name in clear_set:
1058
+ skipped_fields.append(field_name)
1059
+
1060
+ if "fill_color" in clear_set:
1061
+ cell.fill = PatternFill(fill_type=None)
1062
+ elif style.fill_color is not None:
1063
+ color = _normalize_hex_color(style.fill_color)
1064
+ cell.fill = PatternFill(fill_type="solid", start_color=color, end_color=color)
1065
+
1066
+ if "number_format" in clear_set:
1067
+ cell.number_format = "General"
1068
+ elif style.number_format is not None:
1069
+ cell.number_format = style.number_format
1070
+
1071
+ cell.alignment = alignment
1072
+ return skipped_fields
1073
+
1074
+
1075
+ def _normalize_hex_color(value: str) -> str:
1076
+ normalized = value.strip().lstrip("#").upper()
1077
+ if len(normalized) != 6 or any(
1078
+ character not in "0123456789ABCDEF" for character in normalized
1079
+ ):
1080
+ raise InvalidArgumentsError(f"Invalid RGB hex color: {value}")
1081
+ return normalized
1082
+
1083
+
1084
+ def _xlsx_alignment_value(raw: str) -> str:
1085
+ normalized = raw.strip().lower()
1086
+ if normalized not in _XLSX_ALIGNMENT_MAP:
1087
+ raise InvalidArgumentsError(f"Unsupported XLSX alignment: {raw}")
1088
+ return _XLSX_ALIGNMENT_MAP[normalized]
1089
+
1090
+
1091
+ def _xlsx_vertical_alignment_value(raw: str) -> str:
1092
+ normalized = raw.strip().lower()
1093
+ if normalized not in _XLSX_VERTICAL_ALIGNMENT_MAP:
1094
+ raise InvalidArgumentsError(f"Unsupported XLSX vertical alignment: {raw}")
1095
+ return _XLSX_VERTICAL_ALIGNMENT_MAP[normalized]
1096
+
1097
+
1098
+ def _is_text_like_item(item: IndexedItem) -> bool:
1099
+ metadata = item.metadata
1100
+ display_text = str(metadata.get("display_text", item.content_text)).strip()
1101
+ if not display_text:
1102
+ return False
1103
+
1104
+ raw_value = metadata.get("raw_value")
1105
+ if metadata.get("formula") is not None:
1106
+ return False
1107
+ if isinstance(raw_value, bool):
1108
+ return False
1109
+ if isinstance(raw_value, (int, float)):
1110
+ return False
1111
+ if NUMERIC_TEXT_PATTERN.match(display_text):
1112
+ return False
1113
+ return any(character.isalpha() for character in display_text)
1114
+
1115
+
1116
+ def _row_number(coordinate: str) -> int:
1117
+ row_number, _ = _coordinate_sort_key(coordinate)
1118
+ return row_number
1119
+
1120
+
1121
+ def _coordinate_sort_key(coordinate: str) -> tuple[int, int]:
1122
+ normalized = _normalize_coordinate(coordinate)
1123
+ if coordinate_to_tuple is None:
1124
+ raise RuntimeError("openpyxl is required for XLSX operations.")
1125
+ return coordinate_to_tuple(normalized)
1126
+
1127
+
1128
+ def _representative_score(coordinate: str, item: IndexedItem) -> tuple[int, int, int]:
1129
+ display_text = str(item.metadata.get("display_text", item.content_text))
1130
+ _, column_number = _coordinate_sort_key(coordinate)
1131
+ alpha_characters = sum(1 for character in display_text if character.isalpha())
1132
+ return (alpha_characters, len(display_text), -column_number)
1133
+
1134
+
1135
+ def _build_row_embedding_text(
1136
+ workbook_name: str,
1137
+ sheet_name: str,
1138
+ row_number: int,
1139
+ contributing_cells: tuple[XlsxRowEmbeddingCell, ...],
1140
+ ) -> str:
1141
+ lines = [
1142
+ f"Workbook: {workbook_name}",
1143
+ f"Sheet: {sheet_name}",
1144
+ f"Row: {row_number}",
1145
+ "Cells:",
1146
+ ]
1147
+ lines.extend(
1148
+ f"- {cell.coordinate}: {cell.display_text}" for cell in contributing_cells
1149
+ )
1150
+ return "\n".join(lines)
1151
+
1152
+
1153
+ def _used_bounds(worksheet) -> tuple[int, int, int, int] | None:
1154
+ used_cells = [
1155
+ cell
1156
+ for row in worksheet.iter_rows()
1157
+ for cell in row
1158
+ if _is_indexable_cell(cell)
1159
+ ]
1160
+ if not used_cells:
1161
+ return None
1162
+ min_row = min(cell.row for cell in used_cells)
1163
+ min_col = min(cell.column for cell in used_cells)
1164
+ max_row = max(cell.row for cell in used_cells)
1165
+ max_col = max(cell.column for cell in used_cells)
1166
+ return (min_row, min_col, max_row, max_col)
1167
+
1168
+
1169
+ def _snapshot_bounds(
1170
+ worksheet,
1171
+ *,
1172
+ cell_range: str | None,
1173
+ start_cell: str | None,
1174
+ row_count: int | None,
1175
+ column_count: int | None,
1176
+ ) -> tuple[int, int, int, int] | None:
1177
+ if cell_range is not None:
1178
+ if start_cell is not None or row_count is not None or column_count is not None:
1179
+ raise InvalidArgumentsError(
1180
+ "sheet snapshot range and window inputs are mutually exclusive."
1181
+ )
1182
+ if range_boundaries is None:
1183
+ raise RuntimeError("openpyxl is required for XLSX operations.")
1184
+ min_col, min_row, max_col, max_row = range_boundaries(cell_range)
1185
+ return (min_row, min_col, max_row, max_col)
1186
+
1187
+ if start_cell is not None or row_count is not None or column_count is not None:
1188
+ if start_cell is None or row_count is None or column_count is None:
1189
+ raise InvalidArgumentsError(
1190
+ "sheet snapshot windows require start_cell, row_count, and column_count together."
1191
+ )
1192
+ start_row, start_column = _coordinate_sort_key(start_cell)
1193
+ return (
1194
+ start_row,
1195
+ start_column,
1196
+ start_row + row_count - 1,
1197
+ start_column + column_count - 1,
1198
+ )
1199
+
1200
+ return _used_bounds(worksheet)
1201
+
1202
+
1203
+ def _format_range(bounds: tuple[int, int, int, int] | None) -> str | None:
1204
+ if bounds is None or get_column_letter is None:
1205
+ return None
1206
+ min_row, min_col, max_row, max_col = bounds
1207
+ return (
1208
+ f"{get_column_letter(min_col)}{min_row}:{get_column_letter(max_col)}{max_row}"
1209
+ )
1210
+
1211
+
1212
+ def _last_used_row(worksheet) -> int:
1213
+ bounds = _used_bounds(worksheet)
1214
+ return 0 if bounds is None else bounds[2]
1215
+
1216
+
1217
+ def _header_map(worksheet) -> dict[str, int]:
1218
+ header_map: dict[str, int] = {}
1219
+ for cell in worksheet[1]:
1220
+ header = _display_text(cell).strip()
1221
+ if header:
1222
+ header_map[header] = cell.column
1223
+ return header_map
1224
+
1225
+
1226
+ def _resolve_record_mapping(
1227
+ worksheet, column_mapping: dict[str, str] | None
1228
+ ) -> dict[str, int]:
1229
+ if column_mapping is None:
1230
+ return _header_map(worksheet)
1231
+
1232
+ header_map = _header_map(worksheet)
1233
+ resolved: dict[str, int] = {}
1234
+ for field_name, target in column_mapping.items():
1235
+ normalized_target = target.strip()
1236
+ if normalized_target in header_map:
1237
+ resolved[field_name] = header_map[normalized_target]
1238
+ continue
1239
+ if _is_column_reference(normalized_target):
1240
+ if coordinate_to_tuple is None:
1241
+ raise RuntimeError("openpyxl is required for XLSX operations.")
1242
+ _, column_number = coordinate_to_tuple(f"{normalized_target.upper()}1")
1243
+ resolved[field_name] = column_number
1244
+ continue
1245
+ raise InvalidArgumentsError(
1246
+ f"Unknown worksheet header in column_mapping: {target}"
1247
+ )
1248
+ return resolved
1249
+
1250
+
1251
+ def _is_column_reference(value: str) -> bool:
1252
+ return bool(re.fullmatch(r"[A-Za-z]+", value))
1253
+
1254
+
1255
+ def _first_indexable_cell(worksheet):
1256
+ for row in worksheet.iter_rows():
1257
+ for cell in row:
1258
+ if _is_indexable_cell(cell):
1259
+ return cell
1260
+ return None
1261
+
1262
+
1263
+ def _cell_count(worksheet) -> int:
1264
+ return sum(
1265
+ 1 for row in worksheet.iter_rows() for cell in row if _is_indexable_cell(cell)
1266
+ )