PyPI - exstruct - Versions diffs - 0.2.80__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

exstruct 0.2.80py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

exstruct/__init__.py +23 -12
exstruct/cli/main.py +20 -0
exstruct/core/backends/__init__.py +7 -0
exstruct/core/backends/base.py +42 -0
exstruct/core/backends/com_backend.py +230 -0
exstruct/core/backends/openpyxl_backend.py +191 -0
exstruct/core/cells.py +999 -483
exstruct/core/charts.py +243 -241
exstruct/core/integrate.py +42 -375
exstruct/core/logging_utils.py +16 -0
exstruct/core/modeling.py +87 -0
exstruct/core/pipeline.py +749 -0
exstruct/core/ranges.py +48 -0
exstruct/core/shapes.py +282 -36
exstruct/core/workbook.py +114 -0
exstruct/engine.py +51 -123
exstruct/errors.py +12 -1
exstruct/io/__init__.py +130 -138
exstruct/io/serialize.py +112 -0
exstruct/models/__init__.py +58 -8
exstruct/render/__init__.py +3 -7
{exstruct-0.2.80.dist-info → exstruct-0.3.2.dist-info}/METADATA +133 -18
exstruct-0.3.2.dist-info/RECORD +30 -0
exstruct-0.2.80.dist-info/RECORD +0 -20
{exstruct-0.2.80.dist-info → exstruct-0.3.2.dist-info}/WHEEL +0 -0
{exstruct-0.2.80.dist-info → exstruct-0.3.2.dist-info}/entry_points.txt +0 -0

exstruct/core/cells.py CHANGED Viewed

@@ -1,31 +1,33 @@
 from __future__ import annotations
-from collections import deque
-from collections.abc import Sequence
-from decimal import Decimal, InvalidOperation
-import logging
-from pathlib import Path
-import re
-import numpy as np
-from openpyxl import load_workbook
-from openpyxl.utils import get_column_letter, range_boundaries
-from openpyxl.worksheet.worksheet import Worksheet
-import pandas as pd
-import xlwings as xw
-from ..models import CellRow
-logger = logging.getLogger(__name__)
-_warned_keys: set[str] = set()
-XL_LINESTYLE_NONE = -4142
-XL_INSIDE_VERTICAL = 11
-XL_INSIDE_HORIZONTAL = 12
-XL_EDGE_LEFT = 7
-XL_EDGE_TOP = 8
-XL_EDGE_BOTTOM = 9
-XL_EDGE_RIGHT = 10
-MatrixInput = Sequence[Sequence[object]] | Sequence[object]
+from collections import deque
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass
+from decimal import Decimal, InvalidOperation
+import logging
+from pathlib import Path
+import re
+import numpy as np
+from openpyxl.styles.colors import Color
+from openpyxl.utils import get_column_letter, range_boundaries
+from openpyxl.worksheet.worksheet import Worksheet
+import pandas as pd
+import xlwings as xw
+from ..models import CellRow, MergedCell
+from .workbook import openpyxl_workbook
+logger = logging.getLogger(__name__)
+_warned_keys: set[str] = set()
+XL_LINESTYLE_NONE = -4142
+XL_INSIDE_VERTICAL = 11
+XL_INSIDE_HORIZONTAL = 12
+XL_EDGE_LEFT = 7
+XL_EDGE_TOP = 8
+XL_EDGE_BOTTOM = 9
+XL_EDGE_RIGHT = 10
+MatrixInput = Sequence[Sequence[object]] | Sequence[object]
 # Detection tuning parameters (can be overridden via set_table_detection_params)
 _DETECTION_CONFIG = {
@@ -34,6 +36,426 @@ _DETECTION_CONFIG = {
     "coverage_min": 0.2,
     "min_nonempty_cells": 3,
 }
+_DEFAULT_BACKGROUND_HEX = "FFFFFF"
+_XL_COLOR_NONE = -4142
+# Use dataclasses for lightweight models
+@dataclass(frozen=True)
+class SheetColorsMap:
+    """Background color map for a single worksheet."""
+    sheet_name: str
+    colors_map: dict[str, list[tuple[int, int]]]
+@dataclass(frozen=True)
+class WorkbookColorsMap:
+    """Background color maps for all worksheets in a workbook."""
+    sheets: dict[str, SheetColorsMap]
+    def get_sheet(self, sheet_name: str) -> SheetColorsMap | None:
+        """Return the colors map for a sheet if available.
+        Args:
+            sheet_name: Target worksheet name.
+        Returns:
+            SheetColorsMap for the sheet, or None if missing.
+        """
+        return self.sheets.get(sheet_name)
+def extract_sheet_colors_map(
+    file_path: Path, *, include_default_background: bool, ignore_colors: set[str] | None
+) -> WorkbookColorsMap:
+    """Extract background colors for each worksheet.
+    Args:
+        file_path: Excel workbook path.
+        include_default_background: Whether to include default (white) backgrounds
+            within the used range.
+        ignore_colors: Optional set of color keys to ignore.
+    Returns:
+        WorkbookColorsMap containing per-sheet color maps.
+    """
+    sheets: dict[str, SheetColorsMap] = {}
+    with openpyxl_workbook(file_path, data_only=True, read_only=False) as wb:
+        for ws in wb.worksheets:
+            sheet_map = _extract_sheet_colors(
+                ws, include_default_background, ignore_colors
+            )
+            sheets[ws.title] = sheet_map
+    return WorkbookColorsMap(sheets=sheets)
+def extract_sheet_colors_map_com(
+    workbook: xw.Book,
+    *,
+    include_default_background: bool,
+    ignore_colors: set[str] | None,
+) -> WorkbookColorsMap:
+    """Extract background colors for each worksheet via COM display formats.
+    Args:
+        workbook: xlwings workbook instance.
+        include_default_background: Whether to include default (white) backgrounds
+            within the used range.
+        ignore_colors: Optional set of color keys to ignore.
+    Returns:
+        WorkbookColorsMap containing per-sheet color maps.
+    """
+    _prepare_workbook_for_display_format(workbook)
+    sheets: dict[str, SheetColorsMap] = {}
+    for sheet in workbook.sheets:
+        _prepare_sheet_for_display_format(sheet)
+        sheet_map = _extract_sheet_colors_com(
+            sheet, include_default_background, ignore_colors
+        )
+        sheets[sheet.name] = sheet_map
+    return WorkbookColorsMap(sheets=sheets)
+def _extract_sheet_colors(
+    ws: Worksheet, include_default_background: bool, ignore_colors: set[str] | None
+) -> SheetColorsMap:
+    """Extract background colors for a single worksheet.
+    Args:
+        ws: Target worksheet.
+        include_default_background: Whether to include default (white) backgrounds.
+        ignore_colors: Optional set of color keys to ignore.
+    Returns:
+        SheetColorsMap for the worksheet.
+    """
+    min_row, min_col, max_row, max_col = _get_used_range_bounds(ws)
+    colors_map: dict[str, list[tuple[int, int]]] = {}
+    if min_row > max_row or min_col > max_col:
+        return SheetColorsMap(sheet_name=ws.title, colors_map=colors_map)
+    ignore_set = _normalize_ignore_colors(ignore_colors)
+    for row in ws.iter_rows(
+        min_row=min_row, max_row=max_row, min_col=min_col, max_col=max_col
+    ):
+        for cell in row:
+            color_key = _resolve_cell_background(cell, include_default_background)
+            if color_key is None:
+                continue
+            normalized_key = _normalize_color_key(color_key)
+            if _should_ignore_color(normalized_key, ignore_set):
+                continue
+            colors_map.setdefault(normalized_key, []).append(
+                (cell.row, cell.col_idx - 1)
+            )
+    return SheetColorsMap(sheet_name=ws.title, colors_map=colors_map)
+def _extract_sheet_colors_com(
+    sheet: xw.Sheet, include_default_background: bool, ignore_colors: set[str] | None
+) -> SheetColorsMap:
+    """Extract background colors for a single worksheet via COM.
+    Args:
+        sheet: Target worksheet.
+        include_default_background: Whether to include default (white) backgrounds.
+        ignore_colors: Optional set of color keys to ignore.
+    Returns:
+        SheetColorsMap for the worksheet.
+    """
+    colors_map: dict[str, list[tuple[int, int]]] = {}
+    used = sheet.used_range
+    start_row = int(getattr(used, "row", 1))
+    start_col = int(getattr(used, "column", 1))
+    max_row = used.last_cell.row
+    max_col = used.last_cell.column
+    if max_row <= 0 or max_col <= 0:
+        return SheetColorsMap(sheet_name=sheet.name, colors_map=colors_map)
+    ignore_set = _normalize_ignore_colors(ignore_colors)
+    for row in range(start_row, max_row + 1):
+        for col in range(start_col, max_col + 1):
+            color_key = _resolve_cell_background_com(
+                sheet, row, col, include_default_background
+            )
+            if color_key is None:
+                continue
+            normalized_key = _normalize_color_key(color_key)
+            if _should_ignore_color(normalized_key, ignore_set):
+                continue
+            colors_map.setdefault(normalized_key, []).append((row, col - 1))
+    return SheetColorsMap(sheet_name=sheet.name, colors_map=colors_map)
+def _get_used_range_bounds(ws: Worksheet) -> tuple[int, int, int, int]:
+    """Return used range bounds for a worksheet.
+    Args:
+        ws: Target worksheet.
+    Returns:
+        Tuple of (min_row, min_col, max_row, max_col).
+    """
+    try:
+        if _is_effectively_empty_sheet(ws):
+            return 1, 1, 0, 0
+        dim = ws.calculate_dimension()
+        min_col, min_row, max_col, max_row = range_boundaries(dim)
+        return min_row, min_col, max_row, max_col
+    except Exception:
+        max_row = ws.max_row or 0
+        max_col = ws.max_column or 0
+        if max_row == 0 or max_col == 0:
+            return 1, 1, 0, 0
+        return 1, 1, max_row, max_col
+def _is_effectively_empty_sheet(ws: Worksheet) -> bool:
+    """Check whether a worksheet has no content or styling.
+    Args:
+        ws: Target worksheet.
+    Returns:
+        True if the sheet has no meaningful content or style, otherwise False.
+    """
+    if ws.max_row != 1 or ws.max_column != 1:
+        return False
+    cell = ws.cell(row=1, column=1)
+    return cell.value is None and not cell.has_style
+def _resolve_cell_background(
+    cell: object, include_default_background: bool
+) -> str | None:
+    """Resolve a cell's background color key.
+    Args:
+        cell: Worksheet cell object.
+        include_default_background: Whether to treat default fills as white.
+    Returns:
+        Normalized color key or None when excluded.
+    """
+    fill = getattr(cell, "fill", None)
+    if fill is None:
+        return _DEFAULT_BACKGROUND_HEX if include_default_background else None
+    pattern_type = getattr(fill, "patternType", None)
+    if pattern_type in (None, "none"):
+        return _DEFAULT_BACKGROUND_HEX if include_default_background else None
+    color_key = _resolve_fill_color_key(fill)
+    if color_key == _DEFAULT_BACKGROUND_HEX and not include_default_background:
+        return None
+    return color_key
+def _resolve_fill_color_key(fill: object) -> str | None:
+    """Normalize the foreground/background color of a fill.
+    Args:
+        fill: openpyxl fill object.
+    Returns:
+        Normalized color key or None when unavailable.
+    """
+    fg_color = getattr(fill, "fgColor", None)
+    if fg_color is not None:
+        fg_key = _color_to_key(fg_color)
+        if fg_key is not None:
+            return fg_key
+    bg_color = getattr(fill, "bgColor", None)
+    return _color_to_key(bg_color) if bg_color is not None else None
+def _resolve_cell_background_com(
+    sheet: xw.Sheet, row: int, col: int, include_default_background: bool
+) -> str | None:
+    """Resolve a cell's background color key via COM display format.
+    Args:
+        sheet: Target worksheet.
+        row: 1-based row index.
+        col: 1-based column index.
+        include_default_background: Whether to include default (white) backgrounds.
+    Returns:
+        Normalized color key or None when excluded.
+    """
+    color_value = _get_display_format_color(sheet, row, col)
+    if color_value is None:
+        return _DEFAULT_BACKGROUND_HEX if include_default_background else None
+    if color_value == _XL_COLOR_NONE:
+        return _DEFAULT_BACKGROUND_HEX if include_default_background else None
+    color_key = _excel_color_int_to_rgb_hex(color_value)
+    if color_key == _DEFAULT_BACKGROUND_HEX and not include_default_background:
+        return None
+    return color_key
+def _prepare_workbook_for_display_format(workbook: xw.Book) -> None:
+    """Prepare a workbook so DisplayFormat reflects conditional formatting.
+    Args:
+        workbook: xlwings workbook instance.
+    """
+    try:
+        # Force calculation to ensure DisplayFormat.Interior reflects conditional formatting rules
+        workbook.app.calculate()
+    except Exception:
+        return
+def _prepare_sheet_for_display_format(sheet: xw.Sheet) -> None:
+    """Prepare a sheet so DisplayFormat reflects conditional formatting.
+    Args:
+        sheet: Target worksheet.
+    """
+    try:
+        # Activate sheet so DisplayFormat is available
+        sheet.api.Activate()
+    except Exception:
+        return
+    try:
+        # Calculate to apply conditional formatting to DisplayFormat
+        sheet.api.Calculate()
+    except Exception:
+        return
+def _get_display_format_color(sheet: xw.Sheet, row: int, col: int) -> int | None:
+    """Read DisplayFormat.Interior.Color from COM.
+    Args:
+        sheet: Target worksheet.
+        row: 1-based row index.
+        col: 1-based column index.
+    Returns:
+        BGR integer color or None if unavailable.
+    """
+    try:
+        cell = sheet.api.Cells(row, col)
+        display_format = cell.DisplayFormat
+        interior = display_format.Interior
+        return int(interior.Color)
+    except Exception:
+        return None
+def _excel_color_int_to_rgb_hex(color_value: int) -> str:
+    """Convert an Excel color integer into an RGB hex string.
+    Args:
+        color_value: Excel color integer from COM.
+    Returns:
+        RGB hex string (uppercase).
+    """
+    red = color_value & 0xFF
+    green = (color_value >> 8) & 0xFF
+    blue = (color_value >> 16) & 0xFF
+    return f"{red:02X}{green:02X}{blue:02X}"
+def _normalize_color_key(color_key: str) -> str:
+    """Normalize a color key into a canonical representation.
+    Args:
+        color_key: Raw color key (hex or themed/indexed).
+    Returns:
+        Normalized color key.
+    """
+    trimmed = color_key.strip()
+    if not trimmed:
+        return ""
+    lowered = trimmed.lower()
+    if lowered.startswith(("theme:", "indexed:", "auto:")) or lowered == "auto":
+        return lowered
+    hex_key = trimmed.lstrip("#").upper()
+    if len(hex_key) == 8:
+        hex_key = hex_key[2:]
+    return hex_key
+def _normalize_ignore_colors(ignore_colors: set[str] | None) -> set[str]:
+    """Normalize ignore color keys.
+    Args:
+        ignore_colors: Optional set of color keys to ignore.
+    Returns:
+        Normalized set of color keys.
+    """
+    if not ignore_colors:
+        return set()
+    normalized = {_normalize_color_key(color) for color in ignore_colors}
+    return {color for color in normalized if color}
+def _should_ignore_color(color_key: str, ignore_colors: set[str]) -> bool:
+    """Check whether a color key should be ignored.
+    Args:
+        color_key: Normalized color key.
+        ignore_colors: Normalized ignore color set.
+    Returns:
+        True when the color key is ignored.
+    """
+    return color_key in ignore_colors
+def _color_to_key(color: Color | object) -> str | None:
+    """Convert an openpyxl color object into a normalized key.
+    Args:
+        color: openpyxl color object.
+    Returns:
+        Normalized color key string or None when unavailable.
+    """
+    rgb = getattr(color, "rgb", None)
+    if rgb:
+        return _normalize_rgb(str(rgb))
+    color_type = getattr(color, "type", None)
+    if color_type == "theme":
+        theme = getattr(color, "theme", None)
+        tint = getattr(color, "tint", None)
+        theme_id = "unknown" if theme is None else str(theme)
+        if tint is None:
+            return f"theme:{theme_id}"
+        return f"theme:{theme_id}:{tint}"
+    if color_type == "indexed":
+        indexed = getattr(color, "indexed", None)
+        if indexed is not None:
+            return f"indexed:{indexed}"
+    if color_type == "auto":
+        auto = getattr(color, "auto", None)
+        return "auto" if auto is None else f"auto:{auto}"
+    return None
+def _normalize_rgb(rgb: str) -> str:
+    """Normalize an RGB/ARGB string into 6-hex format.
+    Args:
+        rgb: Raw RGB/ARGB string from openpyxl.
+    Returns:
+        Normalized RGB hex string (uppercase, 6 chars when possible).
+    """
+    cleaned = rgb.strip().upper()
+    if cleaned.startswith("0X"):
+        cleaned = cleaned[2:]
+    if len(cleaned) == 8:
+        cleaned = cleaned[2:]
+    return cleaned
 def warn_once(key: str, message: str) -> None:
@@ -76,21 +498,21 @@ def extract_sheet_cells_with_links(file_path: Path) -> dict[str, list[CellRow]]:
         - Links are mapped by column index string (e.g., "0") to hyperlink.target.
     """
     cell_rows = extract_sheet_cells(file_path)
-    wb = load_workbook(file_path, data_only=True, read_only=False)
     links_by_sheet: dict[str, dict[int, dict[str, str]]] = {}
-    for ws in wb.worksheets:
-        sheet_links: dict[int, dict[str, str]] = {}
-        for row in ws.iter_rows():
-            for cell in row:
-                link = getattr(cell, "hyperlink", None)
-                target = getattr(link, "target", None) if link else None
-                if not target:
-                    continue
-                col_str = str(
-                    cell.col_idx - 1
-                )  # zero-based to align with extract_sheet_cells
-                sheet_links.setdefault(cell.row, {})[col_str] = target
-        links_by_sheet[ws.title] = sheet_links
+    with openpyxl_workbook(file_path, data_only=True, read_only=False) as wb:
+        for ws in wb.worksheets:
+            sheet_links: dict[int, dict[str, str]] = {}
+            for row in ws.iter_rows():
+                for cell in row:
+                    link = getattr(cell, "hyperlink", None)
+                    target = getattr(link, "target", None) if link else None
+                    if not target:
+                        continue
+                    col_str = str(
+                        cell.col_idx - 1
+                    )  # zero-based to align with extract_sheet_cells
+                    sheet_links.setdefault(cell.row, {})[col_str] = target
+            links_by_sheet[ws.title] = sheet_links
     merged: dict[str, list[CellRow]] = {}
     for sheet_name, rows in cell_rows.items():
@@ -104,32 +526,67 @@ def extract_sheet_cells_with_links(file_path: Path) -> dict[str, list[CellRow]]:
     return merged
-def shrink_to_content(  # noqa: C901
-    sheet: xw.Sheet,
-    top: int,
-    left: int,
-    bottom: int,
-    right: int,
-    require_inside_border: bool = False,
-    min_nonempty_ratio: float = 0.0,
-) -> tuple[int, int, int, int]:
-    """Trim a rectangle based on cell contents and optional border heuristics."""
+def extract_sheet_merged_cells(file_path: Path) -> dict[str, list[MergedCell]]:
+    """Extract merged cell ranges per sheet via openpyxl.
+    Args:
+        file_path: Excel workbook path.
+    Returns:
+        Mapping of sheet name to merged cell ranges.
+    """
+    merged_by_sheet: dict[str, list[MergedCell]] = {}
+    with openpyxl_workbook(file_path, data_only=True, read_only=False) as wb:
+        for ws in wb.worksheets:
+            merged_ranges = getattr(ws, "merged_cells", None)
+            if merged_ranges is None:
+                merged_by_sheet[ws.title] = []
+                continue
+            results: list[MergedCell] = []
+            for merged_range in getattr(merged_ranges, "ranges", []):
+                bounds = range_boundaries(str(merged_range))
+                min_col, min_row, max_col, max_row = bounds
+                cell_value = ws.cell(row=min_row, column=min_col).value
+                value_str = "" if cell_value is None else str(cell_value)
+                results.append(
+                    MergedCell(
+                        r1=min_row,
+                        c1=min_col - 1,
+                        r2=max_row,
+                        c2=max_col - 1,
+                        v=value_str,
+                    )
+                )
+            merged_by_sheet[ws.title] = results
+    return merged_by_sheet
+def shrink_to_content(  # noqa: C901
+    sheet: xw.Sheet,
+    top: int,
+    left: int,
+    bottom: int,
+    right: int,
+    require_inside_border: bool = False,
+    min_nonempty_ratio: float = 0.0,
+) -> tuple[int, int, int, int]:
+    """Trim a rectangle based on cell contents and optional border heuristics."""
     rng = sheet.range((top, left), (bottom, right))
     vals = rng.value
     if vals is None:
         vals = []
     if not isinstance(vals, list):
         vals = [[vals]]
-    elif vals and not isinstance(vals[0], list):
-        vals = [vals]
-    rows_n = len(vals)
-    cols_n = len(vals[0]) if rows_n else 0
-    def to_str(x: object) -> str:
-        return "" if x is None else str(x)
-    def is_empty_value(x: object) -> bool:
-        return to_str(x).strip() == ""
+    elif vals and not isinstance(vals[0], list):
+        vals = [vals]
+    rows_n = len(vals)
+    cols_n = len(vals[0]) if rows_n else 0
+    def to_str(x: object) -> str:
+        return "" if x is None else str(x)
+    def is_empty_value(x: object) -> bool:
+        return to_str(x).strip() == ""
     def row_empty(i: int) -> bool:
         return cols_n == 0 or all(is_empty_value(vals[i][j]) for j in range(cols_n))
@@ -149,11 +606,11 @@ def shrink_to_content(  # noqa: C901
         cnt = sum(1 for i in range(rows_n) if not is_empty_value(vals[i][j]))
         return cnt / rows_n
-    def column_has_inside_border(col_idx: int) -> bool:
-        if not require_inside_border:
-            return False
-        try:
-            for r in range(top, bottom + 1):
+    def column_has_inside_border(col_idx: int) -> bool:
+        if not require_inside_border:
+            return False
+        try:
+            for r in range(top, bottom + 1):
                 ls = (
                     sheet.api.Cells(r, left + col_idx)
                     .Borders(XL_INSIDE_VERTICAL)
@@ -231,22 +688,28 @@ def shrink_to_content(  # noqa: C901
             bottom -= 1
         else:
             break
-    return top, left, bottom, right
-def load_border_maps_xlsx(  # noqa: C901
-    xlsx_path: Path, sheet_name: str
-) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int]:
-    wb = load_workbook(xlsx_path, data_only=True, read_only=False)
-    if sheet_name not in wb.sheetnames:
-        wb.close()
-        raise KeyError(f"Sheet '{sheet_name}' not found in {xlsx_path}")
-    ws = wb[sheet_name]
-    try:
-        min_col, min_row, max_col, max_row = range_boundaries(ws.calculate_dimension())
-    except Exception:
-        min_col, min_row, max_col, max_row = 1, 1, ws.max_column or 1, ws.max_row or 1
+    return top, left, bottom, right
+def load_border_maps_xlsx(  # noqa: C901
+    xlsx_path: Path, sheet_name: str
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int]:
+    with openpyxl_workbook(xlsx_path, data_only=True, read_only=False) as wb:
+        if sheet_name not in wb.sheetnames:
+            raise KeyError(f"Sheet '{sheet_name}' not found in {xlsx_path}")
+        ws = wb[sheet_name]
+        try:
+            min_col, min_row, max_col, max_row = range_boundaries(
+                ws.calculate_dimension()
+            )
+        except Exception:
+            min_col, min_row, max_col, max_row = (
+                1,
+                1,
+                ws.max_column or 1,
+                ws.max_row or 1,
+            )
     shape = (max_row + 1, max_col + 1)
     has_border = np.zeros(shape, dtype=bool)
@@ -255,11 +718,11 @@ def load_border_maps_xlsx(  # noqa: C901
     left_edge = np.zeros(shape, dtype=bool)
     right_edge = np.zeros(shape, dtype=bool)
-    def edge_has_style(edge: object) -> bool:
-        if edge is None:
-            return False
-        style = getattr(edge, "style", None)
-        return style is not None and style != "none"
+    def edge_has_style(edge: object) -> bool:
+        if edge is None:
+            return False
+        style = getattr(edge, "style", None)
+        return style is not None and style != "none"
     for r in range(min_row, max_row + 1):
         for c in range(min_col, max_col + 1):
@@ -270,134 +733,133 @@ def load_border_maps_xlsx(  # noqa: C901
             t = edge_has_style(b.top)
             btm = edge_has_style(b.bottom)
-            left_border = edge_has_style(b.left)
-            rgt = edge_has_style(b.right)
-            if t or btm or left_border or rgt:
-                has_border[r, c] = True
-                if t:
-                    top_edge[r, c] = True
-                if btm:
-                    bottom_edge[r, c] = True
-                if left_border:
-                    left_edge[r, c] = True
-                if rgt:
-                    right_edge[r, c] = True
+            left_border = edge_has_style(b.left)
+            rgt = edge_has_style(b.right)
+            if t or btm or left_border or rgt:
+                has_border[r, c] = True
+                if t:
+                    top_edge[r, c] = True
+                if btm:
+                    bottom_edge[r, c] = True
+                if left_border:
+                    left_edge[r, c] = True
+                if rgt:
+                    right_edge[r, c] = True
-    wb.close()
     return has_border, top_edge, bottom_edge, left_edge, right_edge, max_row, max_col
-def _detect_border_clusters_numpy(
-    has_border: np.ndarray, min_size: int
-) -> list[tuple[int, int, int, int]]:
-    from scipy.ndimage import label
-    structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.uint8)
-    lbl, num = label(has_border.astype(np.uint8), structure=structure)
-    rects: list[tuple[int, int, int, int]] = []
-    for k in range(1, int(num) + 1):
-        ys, xs = np.where(lbl == k)
-        if int(len(ys)) < min_size:
-            continue
-        rects.append((int(ys.min()), int(xs.min()), int(ys.max()), int(xs.max())))
-    return rects
-def _detect_border_clusters_python(
-    has_border: np.ndarray, min_size: int
-) -> list[tuple[int, int, int, int]]:
-    h, w = has_border.shape
-    visited = np.zeros_like(has_border, dtype=bool)
-    rects: list[tuple[int, int, int, int]] = []
-    for r in range(h):
-        for c in range(w):
-            if not has_border[r, c] or visited[r, c]:
-                continue
-            q = deque([(r, c)])
-            visited[r, c] = True
-            ys = [r]
-            xs = [c]
-            while q:
-                yy, xx = q.popleft()
-                for dy, dx in ((1, 0), (-1, 0), (0, 1), (0, -1)):
-                    ny, nx = yy + dy, xx + dx
-                    if (
-                        0 <= ny < h
-                        and 0 <= nx < w
-                        and has_border[ny, nx]
-                        and not visited[ny, nx]
-                    ):
-                        visited[ny, nx] = True
-                        q.append((ny, nx))
-                        ys.append(ny)
-                        xs.append(nx)
-            if len(ys) >= min_size:
-                rects.append((min(ys), min(xs), max(ys), max(xs)))
-    return rects
-def detect_border_clusters(
-    has_border: np.ndarray, min_size: int = 4
-) -> list[tuple[int, int, int, int]]:
-    try:
-        return _detect_border_clusters_numpy(has_border, min_size)
-    except Exception:
-        warn_once(
-            "scipy-missing",
-            "scipy is not available. Falling back to pure-Python BFS for connected components, which may be significantly slower.",
-        )
-        return _detect_border_clusters_python(has_border, min_size)
-def _get_values_block(
-    ws: Worksheet, top: int, left: int, bottom: int, right: int
-) -> list[list[object]]:
-    vals: list[list[object]] = []
-    for row in ws.iter_rows(
-        min_row=top, max_row=bottom, min_col=left, max_col=right, values_only=True
-    ):
-        vals.append(list(row))
-    return vals
-def _ensure_matrix(matrix: MatrixInput) -> list[list[object]]:
-    rows_seq = list(matrix)
-    if not rows_seq:
-        return []
-    first = rows_seq[0]
-    if isinstance(first, Sequence) and not isinstance(first, (str, bytes, bytearray)):
-        normalized: list[list[object]] = []
-        for row in rows_seq:
-            if isinstance(row, Sequence) and not isinstance(
-                row, (str, bytes, bytearray)
-            ):
-                normalized.append(list(row))
-            else:
-                normalized.append([row])
-        return normalized
-    return [list(rows_seq)]
-def _table_density_metrics(matrix: MatrixInput) -> tuple[float, float]:
-    """
-    Given a 2D matrix (list of rows), return (density, coverage).
-    density: nonempty / total cells.
-    coverage: area of tight bounding box of nonempty cells divided by total area.
-    """
-    normalized = _ensure_matrix(matrix)
-    if not normalized:
-        return 0.0, 0.0
-    rows = len(normalized)
-    cols = len(normalized[0]) if rows else 0
-    if rows == 0 or cols == 0:
-        return 0.0, 0.0
-    nonempty_coords = []
-    for i, row in enumerate(normalized):
-        for j, v in enumerate(row):
-            if not (v is None or str(v).strip() == ""):
-                nonempty_coords.append((i, j))
+def _detect_border_clusters_numpy(
+    has_border: np.ndarray, min_size: int
+) -> list[tuple[int, int, int, int]]:
+    from scipy.ndimage import label
+    structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.uint8)
+    lbl, num = label(has_border.astype(np.uint8), structure=structure)
+    rects: list[tuple[int, int, int, int]] = []
+    for k in range(1, int(num) + 1):
+        ys, xs = np.where(lbl == k)
+        if int(len(ys)) < min_size:
+            continue
+        rects.append((int(ys.min()), int(xs.min()), int(ys.max()), int(xs.max())))
+    return rects
+def _detect_border_clusters_python(
+    has_border: np.ndarray, min_size: int
+) -> list[tuple[int, int, int, int]]:
+    h, w = has_border.shape
+    visited = np.zeros_like(has_border, dtype=bool)
+    rects: list[tuple[int, int, int, int]] = []
+    for r in range(h):
+        for c in range(w):
+            if not has_border[r, c] or visited[r, c]:
+                continue
+            q = deque([(r, c)])
+            visited[r, c] = True
+            ys = [r]
+            xs = [c]
+            while q:
+                yy, xx = q.popleft()
+                for dy, dx in ((1, 0), (-1, 0), (0, 1), (0, -1)):
+                    ny, nx = yy + dy, xx + dx
+                    if (
+                        0 <= ny < h
+                        and 0 <= nx < w
+                        and has_border[ny, nx]
+                        and not visited[ny, nx]
+                    ):
+                        visited[ny, nx] = True
+                        q.append((ny, nx))
+                        ys.append(ny)
+                        xs.append(nx)
+            if len(ys) >= min_size:
+                rects.append((min(ys), min(xs), max(ys), max(xs)))
+    return rects
+def detect_border_clusters(
+    has_border: np.ndarray, min_size: int = 4
+) -> list[tuple[int, int, int, int]]:
+    try:
+        return _detect_border_clusters_numpy(has_border, min_size)
+    except Exception:
+        warn_once(
+            "scipy-missing",
+            "scipy is not available. Falling back to pure-Python BFS for connected components, which may be significantly slower.",
+        )
+        return _detect_border_clusters_python(has_border, min_size)
+def _get_values_block(
+    ws: Worksheet, top: int, left: int, bottom: int, right: int
+) -> list[list[object]]:
+    vals: list[list[object]] = []
+    for row in ws.iter_rows(
+        min_row=top, max_row=bottom, min_col=left, max_col=right, values_only=True
+    ):
+        vals.append(list(row))
+    return vals
+def _ensure_matrix(matrix: MatrixInput) -> list[list[object]]:
+    rows_seq = list(matrix)
+    if not rows_seq:
+        return []
+    first = rows_seq[0]
+    if isinstance(first, Sequence) and not isinstance(first, str | bytes | bytearray):
+        normalized: list[list[object]] = []
+        for row in rows_seq:
+            if isinstance(row, Sequence) and not isinstance(
+                row, str | bytes | bytearray
+            ):
+                normalized.append(list(row))
+            else:
+                normalized.append([row])
+        return normalized
+    return [list(rows_seq)]
+def _table_density_metrics(matrix: MatrixInput) -> tuple[float, float]:
+    """
+    Given a 2D matrix (list of rows), return (density, coverage).
+    density: nonempty / total cells.
+    coverage: area of tight bounding box of nonempty cells divided by total area.
+    """
+    normalized = _ensure_matrix(matrix)
+    if not normalized:
+        return 0.0, 0.0
+    rows = len(normalized)
+    cols = len(normalized[0]) if rows else 0
+    if rows == 0 or cols == 0:
+        return 0.0, 0.0
+    nonempty_coords = []
+    for i, row in enumerate(normalized):
+        for j, v in enumerate(row):
+            if not (v is None or str(v).strip() == ""):
+                nonempty_coords.append((i, j))
     total = rows * cols
     if not nonempty_coords:
@@ -414,27 +876,29 @@ def _table_density_metrics(matrix: MatrixInput) -> tuple[float, float]:
     return density, coverage
-def _is_plausible_table(matrix: MatrixInput) -> bool:
-    """
-    Heuristic: require at least 2 rows and 2 cols with meaningful data.
-    - At least 2 rows have 2 以上の非空セル
-    - At least 2 columns have 2 以上の非空セル
-    """
-    normalized = _ensure_matrix(matrix)
-    if not normalized:
-        return False
-    rows = len(normalized)
-    cols = max((len(r) if isinstance(r, list) else 1) for r in normalized) if rows else 0
-    if rows < 2 or cols < 2:
-        return False
-    row_counts: list[int] = []
-    col_counts = [0] * cols
-    for r in normalized:
-        cnt = 0
-        for j in range(cols):
-            v = r[j] if j < len(r) else None
+def _is_plausible_table(matrix: MatrixInput) -> bool:
+    """
+    Heuristic: require at least 2 rows and 2 cols with meaningful data.
+    - At least 2 rows have 2 以上の非空セル
+    - At least 2 columns have 2 以上の非空セル
+    """
+    normalized = _ensure_matrix(matrix)
+    if not normalized:
+        return False
+    rows = len(normalized)
+    cols = (
+        max((len(r) if isinstance(r, list) else 1) for r in normalized) if rows else 0
+    )
+    if rows < 2 or cols < 2:
+        return False
+    row_counts: list[int] = []
+    col_counts = [0] * cols
+    for r in normalized:
+        cnt = 0
+        for j in range(cols):
+            v = r[j] if j < len(r) else None
             if not (v is None or str(v).strip() == ""):
                 cnt += 1
                 col_counts[j] += 1
@@ -445,7 +909,9 @@ def _is_plausible_table(matrix: MatrixInput) -> bool:
     return rows_with_two >= 2 and cols_with_two >= 2
-def _nonempty_clusters(matrix: Sequence[Sequence[object]]) -> list[tuple[int, int, int, int]]:
+def _nonempty_clusters(
+    matrix: Sequence[Sequence[object]],
+) -> list[tuple[int, int, int, int]]:
     """Return bounding boxes of connected components of nonempty cells (4-neighbor)."""
     if not matrix:
         return []
@@ -460,12 +926,12 @@ def _nonempty_clusters(matrix: Sequence[Sequence[object]]) -> list[tuple[int, in
     visited = [[False] * cols for _ in range(rows)]
     boxes: list[tuple[int, int, int, int]] = []
-    def bfs(sr: int, sc: int) -> tuple[int, int, int, int]:
-        q = deque([(sr, sc)])
-        visited[sr][sc] = True
-        ys = [sr]
-        xs = [sc]
-        while q:
+    def bfs(sr: int, sc: int) -> tuple[int, int, int, int]:
+        q = deque([(sr, sc)])
+        visited[sr][sc] = True
+        ys = [sr]
+        xs = [sc]
+        while q:
             r, c = q.popleft()
             for dr, dc in ((1, 0), (-1, 0), (0, 1), (0, -1)):
                 nr, nc = r + dr, c + dc
@@ -488,17 +954,17 @@ def _nonempty_clusters(matrix: Sequence[Sequence[object]]) -> list[tuple[int, in
     return boxes
-def _normalize_matrix(matrix: object) -> list[list[object]]:
-    if matrix is None:
-        return []
-    if isinstance(matrix, list):
-        return _ensure_matrix(matrix)
-    if isinstance(matrix, Sequence) and not isinstance(matrix, (str, bytes, bytearray)):
-        return _ensure_matrix(matrix)
-    return [[matrix]]
+def _normalize_matrix(matrix: object) -> list[list[object]]:
+    if matrix is None:
+        return []
+    if isinstance(matrix, list):
+        return _ensure_matrix(matrix)
+    if isinstance(matrix, Sequence) and not isinstance(matrix, str | bytes | bytearray):
+        return _ensure_matrix(matrix)
+    return [[matrix]]
-def _header_like_row(row: list[object]) -> bool:
+def _header_like_row(row: list[object]) -> bool:
     nonempty = [v for v in row if not (v is None or str(v).strip() == "")]
     if len(nonempty) < 2:
         return False
@@ -513,19 +979,21 @@ def _header_like_row(row: list[object]) -> bool:
     return str_like >= num_like and str_like >= 1
-def _table_signal_score(matrix: Sequence[Sequence[object]]) -> float:
-    normalized = _ensure_matrix(matrix)
-    density, coverage = _table_density_metrics(normalized)
-    header = any(_header_like_row(r) for r in normalized[:2])  # check first 2 rows
+def _table_signal_score(matrix: Sequence[Sequence[object]]) -> float:
+    normalized = _ensure_matrix(matrix)
+    density, coverage = _table_density_metrics(normalized)
+    header = any(_header_like_row(r) for r in normalized[:2])  # check first 2 rows
-    rows = len(normalized)
-    cols = max((len(r) if isinstance(r, list) else 1) for r in normalized) if rows else 0
-    row_counts: list[int] = []
-    col_counts = [0] * cols if cols else []
-    for r in normalized:
-        cnt = 0
-        for j in range(cols):
-            v = r[j] if j < len(r) else None
+    rows = len(normalized)
+    cols = (
+        max((len(r) if isinstance(r, list) else 1) for r in normalized) if rows else 0
+    )
+    row_counts: list[int] = []
+    col_counts = [0] * cols if cols else []
+    for r in normalized:
+        cnt = 0
+        for j in range(cols):
+            v = r[j] if j < len(r) else None
             if not (v is None or str(v).strip() == ""):
                 cnt += 1
                 if j < len(col_counts):
@@ -565,28 +1033,28 @@ def set_table_detection_params(
         _DETECTION_CONFIG["min_nonempty_cells"] = min_nonempty_cells
-def shrink_to_content_openpyxl(  # noqa: C901
-    ws: Worksheet,
-    top: int,
-    left: int,
-    bottom: int,
-    right: int,
-    require_inside_border: bool,
-    top_edge: np.ndarray,
-    bottom_edge: np.ndarray,
-    left_edge: np.ndarray,
-    right_edge: np.ndarray,
-    min_nonempty_ratio: float = 0.0,
-) -> tuple[int, int, int, int]:
-    vals = _get_values_block(ws, top, left, bottom, right)
-    rows_n = bottom - top + 1
-    cols_n = right - left + 1
-    def to_str(x: object) -> str:
-        return "" if x is None else str(x)
-    def is_empty_value(x: object) -> bool:
-        return to_str(x).strip() == ""
+def shrink_to_content_openpyxl(  # noqa: C901
+    ws: Worksheet,
+    top: int,
+    left: int,
+    bottom: int,
+    right: int,
+    require_inside_border: bool,
+    top_edge: np.ndarray,
+    bottom_edge: np.ndarray,
+    left_edge: np.ndarray,
+    right_edge: np.ndarray,
+    min_nonempty_ratio: float = 0.0,
+) -> tuple[int, int, int, int]:
+    vals = _get_values_block(ws, top, left, bottom, right)
+    rows_n = bottom - top + 1
+    cols_n = right - left + 1
+    def to_str(x: object) -> str:
+        return "" if x is None else str(x)
+    def is_empty_value(x: object) -> bool:
+        return to_str(x).strip() == ""
     def row_nonempty_ratio_local(i: int) -> float:
         if cols_n <= 0:
@@ -725,29 +1193,45 @@ def shrink_to_content_openpyxl(  # noqa: C901
     return top, left, bottom, right
-def detect_tables_xlwings(sheet: xw.Sheet) -> list[str]:  # noqa: C901
-    """Detect table-like ranges via COM: ListObjects first, then border clusters."""
-    tables: list[str] = []
+def _extract_listobject_tables(sheet: xw.Sheet) -> list[str]:
+    """Extract table ranges from Excel ListObjects via COM.
+    Args:
+        sheet: xlwings worksheet.
+    Returns:
+        List of table ranges as Excel A1 strings.
+    """
+    tables: list[str] = []
     try:
         for lo in sheet.api.ListObjects:
             rng = lo.Range
-            top_row = int(rng.Row)
-            left_col = int(rng.Column)
-            bottom_row = top_row + int(rng.Rows.Count) - 1
-            right_col = left_col + int(rng.Columns.Count) - 1
             addr = rng.Address(RowAbsolute=False, ColumnAbsolute=False)
             tables.append(addr)
-    except Exception:
-        pass
-    used = sheet.used_range
-    max_row = used.last_cell.row
-    max_col = used.last_cell.column
-    def cell_has_any_border(r: int, c: int) -> bool:
-        try:
-            b = sheet.api.Cells(r, c).Borders
-            for idx in (
+    except Exception:
+        pass
+    return tables
+def _detect_border_rectangles_xlwings(
+    sheet: xw.Sheet,
+) -> list[tuple[int, int, int, int]]:
+    """Detect bordered rectangles in a sheet using COM border inspection.
+    Args:
+        sheet: xlwings worksheet.
+    Returns:
+        List of rectangles as (top_row, left_col, bottom_row, right_col).
+    """
+    used = sheet.used_range
+    max_row = used.last_cell.row
+    max_col = used.last_cell.column
+    def cell_has_any_border(r: int, c: int) -> bool:
+        try:
+            b = sheet.api.Cells(r, c).Borders
+            for idx in (
                 XL_EDGE_LEFT,
                 XL_EDGE_TOP,
                 XL_EDGE_RIGHT,
@@ -768,211 +1252,243 @@ def detect_tables_xlwings(sheet: xw.Sheet) -> list[str]:  # noqa: C901
             return False
     grid = [[False] * (max_col + 1) for _ in range(max_row + 1)]
-    for r in range(1, max_row + 1):
-        for c in range(1, max_col + 1):
-            if cell_has_any_border(r, c):
-                grid[r][c] = True
-    visited = [[False] * (max_col + 1) for _ in range(max_row + 1)]
-    def dfs(sr: int, sc: int, acc: list[tuple[int, int]]) -> None:
-        stack = [(sr, sc)]
-        while stack:
-            rr, cc = stack.pop()
-            if not (1 <= rr <= max_row and 1 <= cc <= max_col):
-                continue
-            if visited[rr][cc] or not grid[rr][cc]:
-                continue
-            visited[rr][cc] = True
-            acc.append((rr, cc))
-            for dr, dc in ((1, 0), (-1, 0), (0, 1), (0, -1)):
-                stack.append((rr + dr, cc + dc))
-    clusters: list[tuple[int, int, int, int]] = []
     for r in range(1, max_row + 1):
         for c in range(1, max_col + 1):
-            if grid[r][c] and not visited[r][c]:
-                cluster: list[tuple[int, int]] = []
-                dfs(r, c, cluster)
-                if len(cluster) < 4:
-                    continue
-                rows = [rc[0] for rc in cluster]
-                cols = [rc[1] for rc in cluster]
-                top_row = min(rows)
-                bottom_row = max(rows)
-                left_col = min(cols)
-                right_col = max(cols)
-                clusters.append((top_row, left_col, bottom_row, right_col))
-    def overlaps_for_merge(
-        a: tuple[int, int, int, int], b: tuple[int, int, int, int]
-    ) -> bool:
-        # Do not merge if one rect fully contains the other (separate clusters like big frame vs small table)
-        contains = (
-            a[0] <= b[0] and a[1] <= b[1] and a[2] >= b[2] and a[3] >= b[3]
-        ) or (b[0] <= a[0] and b[1] <= a[1] and b[2] >= a[2] and b[3] >= a[3])
-        if contains:
-            return False
-        return not (a[1] > b[3] or a[3] < b[1] or a[0] > b[2] or a[2] < b[0])
+            if cell_has_any_border(r, c):
+                grid[r][c] = True
+    return _detect_border_rectangles(grid, min_size=4)
+def _detect_border_rectangles(
+    has_border: np.ndarray | Sequence[Sequence[bool]], *, min_size: int
+) -> list[tuple[int, int, int, int]]:
+    """Detect border rectangles from a boolean grid.
+    Args:
+        has_border: Boolean grid of border presence.
+        min_size: Minimum cluster size to keep.
+    Returns:
+        List of rectangles as (top_row, left_col, bottom_row, right_col).
+    """
+    return detect_border_clusters(np.asarray(has_border, dtype=bool), min_size=min_size)
+def _merge_rectangles(
+    rects: Sequence[tuple[int, int, int, int]],
+) -> list[tuple[int, int, int, int]]:
+    """Merge overlapping rectangles while preserving contained regions.
+    Args:
+        rects: Sequence of rectangles (top, left, bottom, right).
+    Returns:
+        Merged rectangles sorted by coordinates.
+    """
     merged_rects: list[tuple[int, int, int, int]] = []
-    for rect in sorted(clusters):
+    for rect in sorted(rects):
         merged = False
-        for i, ex in enumerate(merged_rects):
-            if overlaps_for_merge(rect, ex):
+        for i, existing in enumerate(merged_rects):
+            if _rectangles_overlap_for_merge(rect, existing):
                 merged_rects[i] = (
-                    min(rect[0], ex[0]),
-                    min(rect[1], ex[1]),
-                    max(rect[2], ex[2]),
-                    max(rect[3], ex[3]),
+                    min(rect[0], existing[0]),
+                    min(rect[1], existing[1]),
+                    max(rect[2], existing[2]),
+                    max(rect[3], existing[3]),
                 )
                 merged = True
                 break
         if not merged:
             merged_rects.append(rect)
+    return merged_rects
-    dedup: set[str] = set()
-    for top_row, left_col, bottom_row, right_col in merged_rects:
-        top_row, left_col, bottom_row, right_col = shrink_to_content(
-            sheet, top_row, left_col, bottom_row, right_col, require_inside_border=False
-        )
-        try:
-            rng_vals = sheet.range((top_row, left_col), (bottom_row, right_col)).value
-            rng_vals = _normalize_matrix(rng_vals)
-            nonempty = sum(
-                1
-                for row in rng_vals
-                for v in (row if isinstance(row, list) else [row])
-                if not (v is None or str(v).strip() == "")
-            )
-        except Exception:
-            nonempty = 0
-        if nonempty < _DETECTION_CONFIG["min_nonempty_cells"]:
+def _rectangles_overlap_for_merge(
+    a: tuple[int, int, int, int], b: tuple[int, int, int, int]
+) -> bool:
+    """Return True when rectangles should be merged.
+    Args:
+        a: First rectangle (top, left, bottom, right).
+        b: Second rectangle (top, left, bottom, right).
+    Returns:
+        True if rectangles overlap and neither fully contains the other.
+    """
+    contains = (a[0] <= b[0] and a[1] <= b[1] and a[2] >= b[2] and a[3] >= b[3]) or (
+        b[0] <= a[0] and b[1] <= a[1] and b[2] >= a[2] and b[3] >= a[3]
+    )
+    if contains:
+        return False
+    return not (a[1] > b[3] or a[3] < b[1] or a[0] > b[2] or a[2] < b[0])
+def _collect_table_candidates_from_values(
+    values: Sequence[Sequence[object]],
+    *,
+    base_top: int,
+    base_left: int,
+    col_name: Callable[[int], str],
+) -> list[str]:
+    """Collect table candidates from a normalized value matrix.
+    Args:
+        values: Normalized matrix of cell values.
+        base_top: Top row index of the matrix in worksheet coordinates (1-based).
+        base_left: Left column index of the matrix in worksheet coordinates (1-based).
+        col_name: Function to convert column index to Excel letters.
+    Returns:
+        List of detected table candidate range strings.
+    """
+    normalized = [list(row) for row in values]
+    nonempty = _count_nonempty_cells(normalized)
+    if nonempty < _DETECTION_CONFIG["min_nonempty_cells"]:
+        return []
+    results: list[str] = []
+    clusters = _nonempty_clusters(normalized)
+    for r0, c0, r1, c1 in clusters:
+        sub = [row[c0 : c1 + 1] for row in normalized[r0 : r1 + 1]]
+        density, coverage = _table_density_metrics(sub)
+        if (
+            density < _DETECTION_CONFIG["density_min"]
+            and coverage < _DETECTION_CONFIG["coverage_min"]
+        ):
             continue
-        clusters = _nonempty_clusters(rng_vals)
-        for r0, c0, r1, c1 in clusters:
-            sub = [row[c0 : c1 + 1] for row in rng_vals[r0 : r1 + 1]]
-            density, coverage = _table_density_metrics(sub)
-            if (
-                density < _DETECTION_CONFIG["density_min"]
-                and coverage < _DETECTION_CONFIG["coverage_min"]
-            ):
-                continue
-            if not _is_plausible_table(sub):
-                continue
-            score = _table_signal_score(sub)
-            if score < _DETECTION_CONFIG["table_score_threshold"]:
-                continue
-            addr = f"{xw.utils.col_name(left_col + c0)}{top_row + r0}:{xw.utils.col_name(left_col + c1)}{top_row + r1}"
-            if addr not in dedup:
-                dedup.add(addr)
-                tables.append(addr)
-    return tables
+        if not _is_plausible_table(sub):
+            continue
+        score = _table_signal_score(sub)
+        if score < _DETECTION_CONFIG["table_score_threshold"]:
+            continue
+        addr = (
+            f"{col_name(base_left + c0)}{base_top + r0}:"
+            f"{col_name(base_left + c1)}{base_top + r1}"
+        )
+        results.append(addr)
+    return results
+def _count_nonempty_cells(values: Sequence[Sequence[object]]) -> int:
+    """Count non-empty cells in a normalized matrix.
+    Args:
+        values: Normalized matrix of values.
-def detect_tables_openpyxl(  # noqa: C901
-    xlsx_path: Path, sheet_name: str
-) -> list[str]:
-    wb = load_workbook(
-        xlsx_path,
-        data_only=True,
-        read_only=False,
+    Returns:
+        Number of non-empty cells.
+    """
+    return sum(
+        1 for row in values for v in row if not (v is None or str(v).strip() == "")
     )
-    ws = wb[sheet_name]
-    tables: list[str] = []
-    try:
-        openpyxl_tables: list[object] = []
-        if hasattr(ws, "tables") and ws.tables:
-            if isinstance(ws.tables, dict):
-                openpyxl_tables = list(ws.tables.values())
-            else:
-                openpyxl_tables = list(ws.tables)
-        elif hasattr(ws, "_tables") and ws._tables:
-            openpyxl_tables = list(ws._tables)
-        for t in openpyxl_tables:
-            addr = getattr(t, "ref", None)
-            if addr:
-                tables.append(str(addr))
+def _extract_openpyxl_table_refs(ws: Worksheet) -> list[str]:
+    """Extract table reference strings from an openpyxl worksheet.
+    Args:
+        ws: Target worksheet.
+    Returns:
+        List of table reference strings.
+    """
+    tables: list[str] = []
+    try:
+        openpyxl_tables: list[object] = []
+        if hasattr(ws, "tables") and ws.tables:
+            if isinstance(ws.tables, dict):
+                openpyxl_tables = list(ws.tables.values())
+            else:
+                openpyxl_tables = list(ws.tables)
+        elif hasattr(ws, "_tables") and ws._tables:
+            openpyxl_tables = list(ws._tables)
+        for t in openpyxl_tables:
+            addr = getattr(t, "ref", None)
+            if addr:
+                tables.append(str(addr))
     except Exception:
         pass
+    return tables
-    has_border, top_edge, bottom_edge, left_edge, right_edge, max_row, max_col = (
-        load_border_maps_xlsx(xlsx_path, sheet_name)
-    )
-    rects = detect_border_clusters(has_border, min_size=4)
-    def overlaps_for_merge(
-        a: tuple[int, int, int, int], b: tuple[int, int, int, int]
-    ) -> bool:
-        contains = (
-            a[0] <= b[0] and a[1] <= b[1] and a[2] >= b[2] and a[3] >= b[3]
-        ) or (b[0] <= a[0] and b[1] <= a[1] and b[2] >= a[2] and b[3] >= a[3])
-        if contains:
-            return False
-        return not (a[1] > b[3] or a[3] < b[1] or a[0] > b[2] or a[2] < b[0])
-    merged_rects: list[tuple[int, int, int, int]] = []
-    for rect in sorted(rects):
-        merged = False
-        for i, ex in enumerate(merged_rects):
-            if overlaps_for_merge(rect, ex):
-                merged_rects[i] = (
-                    min(rect[0], ex[0]),
-                    min(rect[1], ex[1]),
-                    max(rect[2], ex[2]),
-                    max(rect[3], ex[3]),
-                )
-                merged = True
-                break
-        if not merged:
-            merged_rects.append(rect)
+def detect_tables_xlwings(sheet: xw.Sheet) -> list[str]:
+    """Detect table-like ranges via COM: ListObjects first, then border clusters."""
+    tables: list[str] = []
+    tables.extend(_extract_listobject_tables(sheet))
+    rects = _detect_border_rectangles_xlwings(sheet)
+    merged_rects = _merge_rectangles(rects)
+    dedup: set[str] = set(tables)
-    dedup: set[str] = set()
     for top_row, left_col, bottom_row, right_col in merged_rects:
-        top_row, left_col, bottom_row, right_col = shrink_to_content_openpyxl(
-            ws,
-            top_row,
-            left_col,
-            bottom_row,
-            right_col,
-            require_inside_border=False,
-            top_edge=top_edge,
-            bottom_edge=bottom_edge,
-            left_edge=left_edge,
-            right_edge=right_edge,
-            min_nonempty_ratio=0.0,
-        )
-        vals_block = _get_values_block(ws, top_row, left_col, bottom_row, right_col)
-        vals_block = _normalize_matrix(vals_block)
-        nonempty = sum(
-            1
-            for row in vals_block
-            for v in row
-            if not (v is None or str(v).strip() == "")
+        top_row, left_col, bottom_row, right_col = shrink_to_content(
+            sheet, top_row, left_col, bottom_row, right_col, require_inside_border=False
         )
-        if nonempty < _DETECTION_CONFIG["min_nonempty_cells"]:
+        rng_vals: object | None = None
+        try:
+            rng_vals = sheet.range((top_row, left_col), (bottom_row, right_col)).value
+        except Exception as exc:
+            logger.warning(
+                "Failed to read range for table detection (%s). (%r)",
+                sheet.name,
+                exc,
+            )
+        if rng_vals is None:
             continue
-        clusters = _nonempty_clusters(vals_block)
-        for r0, c0, r1, c1 in clusters:
-            sub = [row[c0 : c1 + 1] for row in vals_block[r0 : r1 + 1]]
-            density, coverage = _table_density_metrics(sub)
-            if (
-                density < _DETECTION_CONFIG["density_min"]
-                and coverage < _DETECTION_CONFIG["coverage_min"]
-            ):
-                continue
-            if not _is_plausible_table(sub):
-                continue
-            score = _table_signal_score(sub)
-            if score < _DETECTION_CONFIG["table_score_threshold"]:
-                continue
-            addr = f"{get_column_letter(left_col + c0)}{top_row + r0}:{get_column_letter(left_col + c1)}{top_row + r1}"
+        candidates = _collect_table_candidates_from_values(
+            _normalize_matrix(rng_vals),
+            base_top=top_row,
+            base_left=left_col,
+            col_name=xw.utils.col_name,
+        )
+        for addr in candidates:
             if addr not in dedup:
                 dedup.add(addr)
                 tables.append(addr)
-    wb.close()
     return tables
+def detect_tables_openpyxl(xlsx_path: Path, sheet_name: str) -> list[str]:
+    """Detect table-like ranges via openpyxl tables and border clusters."""
+    with openpyxl_workbook(xlsx_path, data_only=True, read_only=False) as wb:
+        ws = wb[sheet_name]
+        tables = _extract_openpyxl_table_refs(ws)
+        has_border, top_edge, bottom_edge, left_edge, right_edge, max_row, max_col = (
+            load_border_maps_xlsx(xlsx_path, sheet_name)
+        )
+        rects = _detect_border_rectangles(has_border, min_size=4)
+        merged_rects = _merge_rectangles(rects)
+        dedup: set[str] = set(tables)
+        for top_row, left_col, bottom_row, right_col in merged_rects:
+            top_row, left_col, bottom_row, right_col = shrink_to_content_openpyxl(
+                ws,
+                top_row,
+                left_col,
+                bottom_row,
+                right_col,
+                require_inside_border=False,
+                top_edge=top_edge,
+                bottom_edge=bottom_edge,
+                left_edge=left_edge,
+                right_edge=right_edge,
+                min_nonempty_ratio=0.0,
+            )
+            vals_block = _get_values_block(ws, top_row, left_col, bottom_row, right_col)
+            candidates = _collect_table_candidates_from_values(
+                _normalize_matrix(vals_block),
+                base_top=top_row,
+                base_left=left_col,
+                col_name=get_column_letter,
+            )
+            for addr in candidates:
+                if addr not in dedup:
+                    dedup.add(addr)
+                    tables.append(addr)
+        return tables
 def detect_tables(sheet: xw.Sheet) -> list[str]:
     excel_path: Path | None = None
     try:
@@ -1027,13 +1543,13 @@ def _coerce_numeric_preserve_format(val: str) -> int | float | str:
             return int(val)
         except Exception:
             return val
-    if _FLOAT_RE.match(val):
-        try:
-            dec = Decimal(val)
-            exponent = int(dec.as_tuple().exponent)
-            scale = max(1, -exponent)
-            quantized = dec.quantize(Decimal("1." + "0" * scale))
-            return float(quantized)
-        except (InvalidOperation, Exception):
-            return val
+    if _FLOAT_RE.match(val):
+        try:
+            dec = Decimal(val)
+            exponent = int(dec.as_tuple().exponent)
+            scale = max(1, -exponent)
+            quantized = dec.quantize(Decimal("1." + "0" * scale))
+            return float(quantized)
+        except (InvalidOperation, Exception):
+            return val
     return val

exstruct 0.2.80__py3-none-any.whl → 0.3.2__py3-none-any.whl

exstruct 0.2.80py3-none-any.whl → 0.3.2py3-none-any.whl