exstruct 0.2.80__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from openpyxl.utils import range_boundaries
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class RangeBounds:
10
+ """Normalized range bounds.
11
+
12
+ Attributes:
13
+ r1: Top row (zero-based).
14
+ c1: Left column (zero-based).
15
+ r2: Bottom row (zero-based).
16
+ c2: Right column (zero-based).
17
+ """
18
+
19
+ r1: int
20
+ c1: int
21
+ r2: int
22
+ c2: int
23
+
24
+
25
+ def parse_range_zero_based(range_str: str) -> RangeBounds | None:
26
+ """Parse an Excel range string into zero-based bounds.
27
+
28
+ Args:
29
+ range_str: Excel range string (e.g., "Sheet1!A1:B2").
30
+
31
+ Returns:
32
+ RangeBounds in zero-based coordinates, or None on failure.
33
+ """
34
+ cleaned = range_str.strip()
35
+ if not cleaned:
36
+ return None
37
+ if "!" in cleaned:
38
+ cleaned = cleaned.split("!", 1)[1]
39
+ try:
40
+ min_col, min_row, max_col, max_row = range_boundaries(cleaned)
41
+ except Exception:
42
+ return None
43
+ return RangeBounds(
44
+ r1=min_row - 1,
45
+ c1=min_col - 1,
46
+ r2=max_row - 1,
47
+ c2=max_col - 1,
48
+ )
exstruct/core/shapes.py CHANGED
@@ -1,32 +1,64 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Iterator
3
+ from collections.abc import Iterable, Iterator
4
4
  import math
5
- from typing import SupportsInt, cast
5
+ from typing import Literal, Protocol, SupportsInt, cast, runtime_checkable
6
6
 
7
7
  import xlwings as xw
8
8
  from xlwings import Book
9
9
 
10
- from ..models import Shape
10
+ from ..models import Arrow, Shape, SmartArt, SmartArtNode
11
11
  from ..models.maps import MSO_AUTO_SHAPE_TYPE_MAP, MSO_SHAPE_TYPE_MAP
12
12
 
13
13
 
14
14
  def compute_line_angle_deg(w: float, h: float) -> float:
15
- """Compute clockwise angle in Excel coordinates where 0 deg points East."""
15
+ """
16
+ Compute the clockwise angle (in degrees) in Excel coordinates where 0° points East.
17
+
18
+ Parameters:
19
+ w (float): Horizontal delta (width, positive to the right).
20
+ h (float): Vertical delta (height, positive downward).
21
+
22
+ Returns:
23
+ float: Angle in degrees measured clockwise from East (e.g., 0° = East, 90° = South).
24
+ """
16
25
  return math.degrees(math.atan2(h, w)) % 360.0
17
26
 
18
27
 
19
- def angle_to_compass(angle: float) -> str:
20
- """Convert angle to 8-point compass direction (0deg=E, 45deg=NE, 90deg=N, etc)."""
28
+ def angle_to_compass(
29
+ angle: float,
30
+ ) -> Literal["E", "SE", "S", "SW", "W", "NW", "N", "NE"]:
31
+ """
32
+ Map an angle in degrees to one of eight compass directions.
33
+
34
+ The angle is interpreted with 0 degrees at East and increasing values rotating counterclockwise (45 -> NE, 90 -> N).
35
+
36
+ Parameters:
37
+ angle (float): Angle in degrees.
38
+
39
+ Returns:
40
+ str: One of `"E"`, `"SE"`, `"S"`, `"SW"`, `"W"`, `"NW"`, `"N"`, or `"NE"` corresponding to the nearest 8-point compass direction.
41
+ """
21
42
  dirs = ["E", "NE", "N", "NW", "W", "SW", "S", "SE"]
22
43
  idx = int(((angle + 22.5) % 360) // 45)
23
- return dirs[idx]
44
+ return cast(Literal["E", "SE", "S", "SW", "W", "NW", "N", "NE"], dirs[idx])
24
45
 
25
46
 
26
47
  def coord_to_cell_by_edges(
27
48
  row_edges: list[float], col_edges: list[float], x: float, y: float
28
49
  ) -> str | None:
29
- """Estimate cell address from coordinates and cumulative edges; return None if out of range."""
50
+ """
51
+ Estimate the Excel A1-style cell that contains a point given cumulative row and column edge coordinates.
52
+
53
+ Parameters:
54
+ row_edges (list[float]): Monotonic list of cumulative vertical edges (top-to-bottom). Consecutive entries define row spans.
55
+ col_edges (list[float]): Monotonic list of cumulative horizontal edges (left-to-right). Consecutive entries define column spans.
56
+ x (float): Horizontal coordinate (same coordinate system as col_edges).
57
+ y (float): Vertical coordinate (same coordinate system as row_edges).
58
+
59
+ Returns:
60
+ str | None: A1-style cell address (e.g., "B3") if the point falls inside the grid; `None` if the point is outside the provided edge ranges. Intervals are treated as left-inclusive and right-exclusive: [edge_i, edge_{i+1}).
61
+ """
30
62
 
31
63
  def find_index(edges: list[float], pos: float) -> int | None:
32
64
  for i in range(1, len(edges)):
@@ -80,10 +112,18 @@ def _should_include_shape(
80
112
  output_mode: str = "standard",
81
113
  ) -> bool:
82
114
  """
83
- Decide whether to emit a shape given output mode.
84
- - standard: emit if text exists OR the shape is an arrow/line/connector.
85
- - light: suppress shapes entirely (handled upstream, but guard defensively).
86
- - verbose: include all (except already-filtered chart/comment/picture/form controls).
115
+ Determine whether a shape should be included in the output based on its properties and the selected output mode.
116
+
117
+ Modes:
118
+ - "light": always exclude shapes.
119
+ - "standard": include when the shape has text or represents a relationship (line/connector).
120
+ - "verbose": include all shapes (other global exclusions are handled elsewhere).
121
+
122
+ Parameters:
123
+ output_mode (str): One of "light", "standard", or "verbose"; controls inclusion rules.
124
+
125
+ Returns:
126
+ bool: `True` if the shape should be emitted, `False` otherwise.
87
127
  """
88
128
  if output_mode == "light":
89
129
  return False
@@ -108,16 +148,179 @@ def _should_include_shape(
108
148
  return True
109
149
 
110
150
 
151
+ @runtime_checkable
152
+ class _TextRangeLike(Protocol):
153
+ """Text range interface for SmartArt nodes."""
154
+
155
+ Text: str | None
156
+
157
+
158
+ @runtime_checkable
159
+ class _TextFrameLike(Protocol):
160
+ """Text frame interface for SmartArt nodes."""
161
+
162
+ HasText: bool
163
+ TextRange: _TextRangeLike
164
+
165
+
166
+ @runtime_checkable
167
+ class _SmartArtNodeLike(Protocol):
168
+ """SmartArt node interface."""
169
+
170
+ Level: int
171
+ TextFrame2: _TextFrameLike
172
+
173
+
174
+ @runtime_checkable
175
+ class _SmartArtLike(Protocol):
176
+ """SmartArt interface."""
177
+
178
+ Layout: object
179
+ AllNodes: Iterable[_SmartArtNodeLike]
180
+
181
+
182
+ def _shape_has_smartart(shp: xw.Shape) -> bool:
183
+ """
184
+ Determine whether a shape exposes SmartArt content.
185
+
186
+ Returns:
187
+ bool: `True` if the shape exposes SmartArt (i.e., has an accessible `HasSmartArt` attribute), `False` otherwise.
188
+ """
189
+ try:
190
+ api = shp.api
191
+ except Exception:
192
+ return False
193
+ try:
194
+ return bool(api.HasSmartArt)
195
+ except Exception:
196
+ return False
197
+
198
+
199
+ def _get_smartart_layout_name(smartart: _SmartArtLike | None) -> str:
200
+ """
201
+ Get the SmartArt layout name or "Unknown" if it cannot be determined.
202
+
203
+ Returns:
204
+ layout_name (str): The layout name from `smartart.Layout.Name`, or "Unknown" when `smartart` is None or the name cannot be retrieved.
205
+ """
206
+ if smartart is None:
207
+ return "Unknown"
208
+ try:
209
+ layout = getattr(smartart, "Layout", None)
210
+ name = getattr(layout, "Name", None)
211
+ return str(name) if name is not None else "Unknown"
212
+ except Exception:
213
+ return "Unknown"
214
+
215
+
216
+ def _collect_smartart_node_info(
217
+ smartart: _SmartArtLike | None,
218
+ ) -> list[tuple[int, str]]:
219
+ """
220
+ Extract a list of (level, text) tuples for each node present in the given SmartArt.
221
+
222
+ Parameters:
223
+ smartart (_SmartArtLike | None): A SmartArt-like COM object or `None`. If `None` or inaccessible, no nodes are collected.
224
+
225
+ Returns:
226
+ list[tuple[int, str]]: A list of tuples where each tuple is (node level, node text). Returns an empty list if the SmartArt is `None`, inaccessible, or if nodes lack a numeric level.
227
+ """
228
+ nodes_info: list[tuple[int, str]] = []
229
+ if smartart is None:
230
+ return nodes_info
231
+ try:
232
+ all_nodes = smartart.AllNodes
233
+ except Exception:
234
+ return nodes_info
235
+
236
+ for node in all_nodes:
237
+ level = _get_smartart_node_level(node)
238
+ if level is None:
239
+ continue
240
+ text = ""
241
+ try:
242
+ text_frame = node.TextFrame2
243
+ if text_frame.HasText:
244
+ text_value = text_frame.TextRange.Text
245
+ text = str(text_value) if text_value is not None else ""
246
+ except Exception:
247
+ text = ""
248
+ nodes_info.append((level, text))
249
+ return nodes_info
250
+
251
+
252
+ def _get_smartart_node_level(node: _SmartArtNodeLike) -> int | None:
253
+ """
254
+ Get the numerical level of a SmartArt node.
255
+
256
+ Returns:
257
+ int | None: The node's level as an integer, or `None` if the level is missing or cannot be converted to an integer.
258
+ """
259
+ try:
260
+ return int(node.Level)
261
+ except Exception:
262
+ return None
263
+
264
+
265
+ def _build_smartart_tree(nodes_info: list[tuple[int, str]]) -> list[SmartArtNode]:
266
+ """
267
+ Build a nested tree of SmartArtNode objects from a flat list of (level, text) tuples.
268
+
269
+ Parameters:
270
+ nodes_info (list[tuple[int, str]]): Ordered tuples where each tuple is (level, text);
271
+ `level` is the hierarchical depth (integer) and `text` is the node label.
272
+
273
+ Returns:
274
+ roots (list[SmartArtNode]): Top-level SmartArtNode instances whose `kids` lists
275
+ contain their nested child nodes according to the provided levels.
276
+ """
277
+ roots: list[SmartArtNode] = []
278
+ stack: list[tuple[int, SmartArtNode]] = []
279
+ for level, text in nodes_info:
280
+ node = SmartArtNode(text=text, kids=[])
281
+ while stack and stack[-1][0] >= level:
282
+ stack.pop()
283
+ if stack:
284
+ stack[-1][1].kids.append(node)
285
+ else:
286
+ roots.append(node)
287
+ stack.append((level, node))
288
+ return roots
289
+
290
+
291
+ def _extract_smartart_nodes(smartart: _SmartArtLike | None) -> list[SmartArtNode]:
292
+ """
293
+ Convert a SmartArt COM object into a list of root SmartArtNode trees.
294
+
295
+ Parameters:
296
+ smartart (_SmartArtLike | None): SmartArt-like COM object to extract nodes from; pass `None` to produce an empty list.
297
+
298
+ Returns:
299
+ list[SmartArtNode]: Root nodes representing the hierarchical SmartArt structure (each node contains its text and children).
300
+ """
301
+ nodes_info = _collect_smartart_node_info(smartart)
302
+ return _build_smartart_tree(nodes_info)
303
+
304
+
111
305
  def get_shapes_with_position( # noqa: C901
112
306
  workbook: Book, mode: str = "standard"
113
- ) -> dict[str, list[Shape]]:
114
- """Scan shapes in a workbook and return per-sheet Shape lists with position info."""
115
- shape_data: dict[str, list[Shape]] = {}
307
+ ) -> dict[str, list[Shape | Arrow | SmartArt]]:
308
+ """
309
+ Scan all shapes in each worksheet and collect their positional and metadata information.
310
+
311
+ Parameters:
312
+ workbook (Book): The xlwings workbook to scan.
313
+ mode (str): Output detail level; "light" skips most shapes, "standard" includes shapes with text or relationships, and "verbose" includes full size/rotation details.
314
+
315
+ Returns:
316
+ dict[str, list[Shape | Arrow | SmartArt]]: Mapping of sheet name to a list of collected shape objects (Shape, Arrow, or SmartArt) containing position (left/top), optional size (width/height), textual content, and other captured metadata (ids, directions, connections, layout/nodes for SmartArt).
317
+ """
318
+ shape_data: dict[str, list[Shape | Arrow | SmartArt]] = {}
116
319
  for sheet in workbook.sheets:
117
- shapes: list[Shape] = []
320
+ shapes: list[Shape | Arrow | SmartArt] = []
118
321
  excel_names: list[tuple[str, int]] = []
119
322
  node_index = 0
120
- pending_connections: list[tuple[Shape, str | None, str | None]] = []
323
+ pending_connections: list[tuple[Arrow, str | None, str | None]] = []
121
324
  for root in sheet.shapes:
122
325
  for shp in iter_shapes_recursive(root):
123
326
  try:
@@ -148,7 +351,11 @@ def get_shapes_with_position( # noqa: C901
148
351
  except Exception:
149
352
  text = ""
150
353
 
151
- if not _should_include_shape(
354
+ if mode == "light":
355
+ continue
356
+
357
+ has_smartart = _shape_has_smartart(shp)
358
+ if not has_smartart and not _should_include_shape(
152
359
  text=text,
153
360
  shape_type_num=type_num,
154
361
  shape_type_str=shape_type_str,
@@ -179,7 +386,8 @@ def get_shapes_with_position( # noqa: C901
179
386
  ):
180
387
  is_relationship_geom = True
181
388
  if shape_type_str and (
182
- "Connector" in shape_type_str or shape_type_str in ("Line", "ConnectLine")
389
+ "Connector" in shape_type_str
390
+ or shape_type_str in ("Line", "ConnectLine")
183
391
  ):
184
392
  is_relationship_geom = True
185
393
  if shape_name and ("Connector" in shape_name or "Line" in shape_name):
@@ -192,19 +400,54 @@ def get_shapes_with_position( # noqa: C901
192
400
 
193
401
  excel_name = shape_name if isinstance(shape_name, str) else None
194
402
 
195
- shape_obj = Shape(
196
- id=shape_id,
197
- text=text,
198
- l=int(shp.left),
199
- t=int(shp.top),
200
- w=int(shp.width)
201
- if mode == "verbose" or shape_type_str == "Group"
202
- else None,
203
- h=int(shp.height)
204
- if mode == "verbose" or shape_type_str == "Group"
205
- else None,
206
- type=type_label,
207
- )
403
+ shape_obj: Shape | Arrow | SmartArt
404
+ if has_smartart:
405
+ smartart_obj: _SmartArtLike | None = None
406
+ try:
407
+ smartart_obj = shp.api.SmartArt
408
+ except Exception:
409
+ smartart_obj = None
410
+ shape_obj = SmartArt(
411
+ id=shape_id,
412
+ text=text,
413
+ l=int(shp.left),
414
+ t=int(shp.top),
415
+ w=int(shp.width)
416
+ if mode == "verbose" or shape_type_str == "Group"
417
+ else None,
418
+ h=int(shp.height)
419
+ if mode == "verbose" or shape_type_str == "Group"
420
+ else None,
421
+ layout=_get_smartart_layout_name(smartart_obj),
422
+ nodes=_extract_smartart_nodes(smartart_obj),
423
+ )
424
+ elif is_relationship_geom:
425
+ shape_obj = Arrow(
426
+ id=shape_id,
427
+ text=text,
428
+ l=int(shp.left),
429
+ t=int(shp.top),
430
+ w=int(shp.width)
431
+ if mode == "verbose" or shape_type_str == "Group"
432
+ else None,
433
+ h=int(shp.height)
434
+ if mode == "verbose" or shape_type_str == "Group"
435
+ else None,
436
+ )
437
+ else:
438
+ shape_obj = Shape(
439
+ id=shape_id,
440
+ text=text,
441
+ l=int(shp.left),
442
+ t=int(shp.top),
443
+ w=int(shp.width)
444
+ if mode == "verbose" or shape_type_str == "Group"
445
+ else None,
446
+ h=int(shp.height)
447
+ if mode == "verbose" or shape_type_str == "Group"
448
+ else None,
449
+ type=type_label,
450
+ )
208
451
  if excel_name:
209
452
  if shape_id is not None:
210
453
  excel_names.append((excel_name, shape_id))
@@ -215,7 +458,8 @@ def get_shapes_with_position( # noqa: C901
215
458
  angle = compute_line_angle_deg(
216
459
  float(shp.width), float(shp.height)
217
460
  )
218
- shape_obj.direction = angle_to_compass(angle) # type: ignore
461
+ if isinstance(shape_obj, Arrow):
462
+ shape_obj.direction = angle_to_compass(angle)
219
463
  try:
220
464
  rot = float(shp.api.Rotation)
221
465
  if abs(rot) > 1e-6:
@@ -225,8 +469,9 @@ def get_shapes_with_position( # noqa: C901
225
469
  try:
226
470
  begin_style = int(shp.api.Line.BeginArrowheadStyle)
227
471
  end_style = int(shp.api.Line.EndArrowheadStyle)
228
- shape_obj.begin_arrow_style = begin_style
229
- shape_obj.end_arrow_style = end_style
472
+ if isinstance(shape_obj, Arrow):
473
+ shape_obj.begin_arrow_style = begin_style
474
+ shape_obj.end_arrow_style = end_style
230
475
  except Exception:
231
476
  pass
232
477
  # Connector begin/end connected shapes (if this shape is a connector).
@@ -262,7 +507,8 @@ def get_shapes_with_position( # noqa: C901
262
507
  pass
263
508
  except Exception:
264
509
  pass
265
- pending_connections.append((shape_obj, begin_name, end_name))
510
+ if isinstance(shape_obj, Arrow):
511
+ pending_connections.append((shape_obj, begin_name, end_name))
266
512
  shapes.append(shape_obj)
267
513
  if pending_connections:
268
514
  name_to_id = {name: sid for name, sid in excel_names}
@@ -0,0 +1,114 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator
4
+ from contextlib import contextmanager
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Any
8
+ import warnings
9
+
10
+ from openpyxl import load_workbook
11
+ import xlwings as xw
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @contextmanager
17
+ def openpyxl_workbook(
18
+ file_path: Path, *, data_only: bool, read_only: bool
19
+ ) -> Iterator[Any]:
20
+ """Open an openpyxl workbook and ensure it is closed.
21
+
22
+ Args:
23
+ file_path: Workbook path.
24
+ data_only: Whether to read formula results.
25
+ read_only: Whether to open in read-only mode.
26
+
27
+ Yields:
28
+ openpyxl workbook instance.
29
+ """
30
+ with warnings.catch_warnings():
31
+ warnings.filterwarnings(
32
+ "ignore",
33
+ message="Unknown extension is not supported and will be removed",
34
+ category=UserWarning,
35
+ module="openpyxl",
36
+ )
37
+ warnings.filterwarnings(
38
+ "ignore",
39
+ message="Conditional Formatting extension is not supported and will be removed",
40
+ category=UserWarning,
41
+ module="openpyxl",
42
+ )
43
+ warnings.filterwarnings(
44
+ "ignore",
45
+ message="Cannot parse header or footer so it will be ignored",
46
+ category=UserWarning,
47
+ module="openpyxl",
48
+ )
49
+ wb = load_workbook(file_path, data_only=data_only, read_only=read_only)
50
+ try:
51
+ yield wb
52
+ finally:
53
+ try:
54
+ wb.close()
55
+ except Exception as exc:
56
+ logger.debug("Failed to close openpyxl workbook. (%r)", exc)
57
+
58
+
59
+ @contextmanager
60
+ def xlwings_workbook(file_path: Path, *, visible: bool = False) -> Iterator[xw.Book]:
61
+ """Open an Excel workbook via xlwings and close if created.
62
+
63
+ Args:
64
+ file_path: Workbook path.
65
+ visible: Whether to show the Excel application window.
66
+
67
+ Yields:
68
+ xlwings workbook instance.
69
+ """
70
+ existing = _find_open_workbook(file_path)
71
+ if existing:
72
+ yield existing
73
+ return
74
+
75
+ app = xw.App(add_book=False, visible=visible)
76
+ wb = app.books.open(str(file_path))
77
+ try:
78
+ yield wb
79
+ finally:
80
+ try:
81
+ wb.close()
82
+ except Exception as exc:
83
+ logger.debug("Failed to close Excel workbook. (%r)", exc)
84
+ try:
85
+ app.quit()
86
+ except Exception as exc:
87
+ logger.debug("Failed to quit Excel application. (%r)", exc)
88
+
89
+
90
+ def _find_open_workbook(file_path: Path) -> xw.Book | None:
91
+ """Return an existing workbook if already open in Excel.
92
+
93
+ Args:
94
+ file_path: Workbook path to search for.
95
+
96
+ Returns:
97
+ Existing xlwings workbook if open; otherwise None.
98
+ """
99
+ try:
100
+ for app in xw.apps:
101
+ for wb in app.books:
102
+ resolved_path: Path | None = None
103
+ try:
104
+ resolved_path = Path(wb.fullname).resolve()
105
+ except Exception as exc:
106
+ logger.debug("Failed to resolve workbook path. (%r)", exc)
107
+ if resolved_path is None:
108
+ continue
109
+ if resolved_path == file_path.resolve():
110
+ return wb
111
+ except Exception as exc:
112
+ logger.debug("Failed to inspect open Excel workbooks. (%r)", exc)
113
+ return None
114
+ return None