ablechart 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ablechart/examples.py ADDED
@@ -0,0 +1,113 @@
1
+ """场景化最小 spec 示例库 — 给 LLM 的 few-shot 弹药。
2
+
3
+ 对能力较弱的模型,「按场景检索一个能跑的最小示例」远比读完整文档可靠。
4
+ 用法:把 ``chart_spec_examples()`` 的输出拼进提示词,或按场景取单条::
5
+
6
+ from ablechart import chart_spec_examples
7
+ prompt += chart_spec_examples("估值分位") # 单场景
8
+ prompt += chart_spec_examples() # 全部
9
+
10
+ 每条示例都保证:字段最少、可直接渲染、命中引擎的智能默认值。
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from typing import Optional
17
+
18
+ # (场景关键词, 说明, 最小 spec)
19
+ SCENARIO_EXAMPLES = [
20
+ ("基础柱状图|排名|对比", "只传数据,类目/系列/类型全自动推断", {
21
+ "title": "分公司营收",
22
+ "data": {"分公司": ["华东", "华南", "华北"], "营收(亿)": [128, 96, 87]},
23
+ }),
24
+ ("量价|双轴|股价成交量", "第二个系列指定 折线+右轴 即可,中文别名均可", {
25
+ "title": "营收与净利率",
26
+ "data": {"季度": ["24Q1", "24Q2", "24Q3"], "营收": [100, 110, 120], "净利率": [0.12, 0.13, 0.15]},
27
+ "series": ["营收", {"column": "净利率", "type": "折线", "axis": "右轴"}],
28
+ "layout": {"y2_axis": {"format": "percent"}},
29
+ }),
30
+ ("净值曲线|走势|时间序列", "日期列自动启用日期轴并抽稀标签;末点标注一个开关", {
31
+ "chart": "line",
32
+ "title": "组合净值走势",
33
+ "data": "nav.csv",
34
+ "legend": "none",
35
+ "series": [{"column": "净值", "last_point_label": {"format": "0.000"}}],
36
+ }),
37
+ ("收入结构|堆叠", "stacked 一个开关,全部数值列自动堆叠", {
38
+ "title": "业务线收入结构",
39
+ "stacked": True,
40
+ "data": {"年份": ["2023", "2024"], "企业年金": [55, 63], "职业年金": [33, 40]},
41
+ }),
42
+ ("占比演变|百分比堆叠|资产配置", "grouping 写 '100%' 即百分比堆叠", {
43
+ "title": "资产配置演变",
44
+ "grouping": "100%",
45
+ "data": {"季度": ["24Q4", "25Q1"], "股票": [24, 26], "债券": [56, 54], "现金": [20, 20]},
46
+ }),
47
+ ("增长归因|贡献分解|GDP分解", "chart=contribution + total 指定合计列,其余自动", {
48
+ "chart": "contribution",
49
+ "title": "营收增速贡献分解",
50
+ "total": "营收同比",
51
+ "data": {"季度": ["24Q1", "24Q2"], "利息": [0.04, 0.03], "手续费": [0.01, 0.02], "营收同比": [0.05, 0.05]},
52
+ }),
53
+ ("收益归因|瀑布|桥图", "totals 列出合计类目即可,正负配色自动", {
54
+ "chart": "瀑布图",
55
+ "title": "年度收益归因(bp)",
56
+ "data": {"项目": ["期初", "配置", "选股", "成本", "期末"], "贡献": [420, 85, 112, -38, 579]},
57
+ "totals": ["期初", "期末"],
58
+ }),
59
+ ("估值分位|历史区间|PE区间", "chart=range,low/high/average/current 四列", {
60
+ "chart": "range",
61
+ "title": "行业PE:当前 vs 十年区间",
62
+ "data": {"行业": ["白酒", "银行"], "低": [18, 4], "高": [55, 9], "均值": [32, 6], "当前": [24, 5]},
63
+ "low": "低", "high": "高", "average": "均值", "current": "当前",
64
+ }),
65
+ ("业绩排名|横向条形图", "orientation=horizontal;labels 加数值标签;highlight 高亮基准", {
66
+ "title": "近一年收益率排名",
67
+ "orientation": "horizontal",
68
+ "legend": "none",
69
+ "data": {"产品": ["A", "基准", "B"], "收益": [0.17, 0.073, -0.038]},
70
+ "series": [{"column": "收益", "labels": {"format": "0.0%"}}],
71
+ "highlight": {"category": "基准"},
72
+ }),
73
+ ("盈利预测|一致预期|预测期", "forecast_from 指定预测起点 → 斜纹+分隔线自动", {
74
+ "title": "EPS增速与一致预期",
75
+ "legend": "none",
76
+ "data": {"年度": ["2024", "2025E", "2026E"], "EPS增速": [0.26, 0.21, 0.16]},
77
+ "series": [{"column": "EPS增速", "labels": {"format": "0%"}}],
78
+ "forecast_from": "2025E",
79
+ }),
80
+ ("均值线|目标区间|监测图", "average 给 series 引擎自己算均值;band 可给分位数自动算", {
81
+ "chart": "line",
82
+ "title": "信用利差监测",
83
+ "data": "spread.csv",
84
+ "legend": "none",
85
+ "series": [{"column": "利差", "last_point_label": True}],
86
+ "annotations": [
87
+ {"type": "average", "series": "利差", "label": "区间均值"},
88
+ {"type": "band", "series": "利差", "quantiles": [0.25, 0.75], "label": "正常区间"},
89
+ ],
90
+ }),
91
+ ("风险收益|散点|气泡", "数值列按顺序自动当 x/y/size", {
92
+ "chart": "bubble",
93
+ "title": "基金风险收益分布",
94
+ "data": {"波动率": [8.1, 9.2], "收益率": [10.5, 12.0], "规模": [50, 80]},
95
+ }),
96
+ ]
97
+
98
+
99
+ def chart_spec_examples(scenario: Optional[str] = None) -> str:
100
+ """返回场景化最小 spec 示例(markdown)。scenario 支持关键词模糊匹配。"""
101
+ rows = SCENARIO_EXAMPLES
102
+ if scenario:
103
+ token = str(scenario).strip().lower()
104
+ matched = [r for r in rows if token in r[0].lower() or token in r[1].lower()]
105
+ rows = matched or rows
106
+
107
+ parts = ["# Chart Spec 场景示例(最小可用)\n"]
108
+ for keywords, note, spec in rows:
109
+ parts.append(f"## {keywords.split('|')[0]}({note})")
110
+ parts.append("```json")
111
+ parts.append(json.dumps(spec, ensure_ascii=False, indent=2))
112
+ parts.append("```\n")
113
+ return "\n".join(parts)
ablechart/inspect.py ADDED
@@ -0,0 +1,230 @@
1
+ """Chart inventory inspection.
2
+
3
+ Belongs to: **inspect** lifecycle per ADR-0007 §1.
4
+ Realises: ADR-0006 §1 (chart inventory).
5
+
6
+ Given an existing .pptx (possibly authored outside this engine, possibly
7
+ without engine metadata), return a structured technical inventory of every
8
+ chart shape — without touching data or layout. The inventory is what upper
9
+ layers (``pptfi``, ``ablemind``) use to build template manifests and decide
10
+ whether a chart is safely replaceable.
11
+
12
+ ADR constraints:
13
+
14
+ - Technical-layer only (no business slot, no ``user_id``, no prompt) — ADR-0007 §2
15
+ - Read-only (no XML mutation, no .pptx side effects)
16
+ - Unsupported charts get ``replaceable=False`` + warning, not exception
17
+ - Fail-loud only on missing/corrupt .pptx
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from dataclasses import dataclass, field
23
+ from typing import List, Optional, Tuple
24
+
25
+ from pptx import Presentation
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Public data model
30
+ # ---------------------------------------------------------------------------
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ChartSelector:
35
+ """Stable identifier for a chart inside a .pptx.
36
+
37
+ Selector priority per ADR-0006 §1:
38
+
39
+ 1. ``explicit_name`` (shape name / business tag) — preferred when available
40
+ 2. ``shape_id`` + ``chart_part`` — stable across re-saves
41
+ 3. ``(slide_index, chart_index_on_slide)`` — fallback only
42
+
43
+ ``slide_index``, ``shape_id``, ``chart_part`` are always populated.
44
+ ``explicit_name`` is populated only when the shape has a non-default name.
45
+ """
46
+
47
+ slide_index: int
48
+ shape_id: int
49
+ chart_part: str # e.g. "ppt/charts/chart1.xml"
50
+ explicit_name: Optional[str] = None
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class ChartInventoryItem:
55
+ """One chart's technical facts. No business semantics (ADR-0007 §2)."""
56
+
57
+ selector: ChartSelector
58
+ shape_name: Optional[str]
59
+ chart_index_on_slide: int
60
+ chart_type: str # "bar", "line", "combo", "scatter", "bubble", "pie", "area", or "unknown"
61
+ category_count: int
62
+ series_count: int
63
+ series_names: List[str] = field(default_factory=list)
64
+ has_embedded_workbook: bool = False
65
+ replaceable: bool = False
66
+ warnings: List[str] = field(default_factory=list) # e.g. "external_workbook_link"
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # Internal helpers (technical layer — no business semantics)
71
+ # ---------------------------------------------------------------------------
72
+
73
+ # ADR-0006 §3 first-batch supported chart types.
74
+ _REPLACEABLE_CHART_TYPES = frozenset({
75
+ "line", "bar", "combo", "area", "pie", "scatter", "bubble",
76
+ })
77
+
78
+ # python-pptx default shape name prefixes. Anything else is treated as a
79
+ # user-provided business tag and surfaced as ``explicit_name``.
80
+ _DEFAULT_SHAPE_NAME_PREFIXES: Tuple[str, ...] = (
81
+ "Chart ", "Placeholder ", "Picture ", "Group ", "TextBox ", "Title ",
82
+ "Content Placeholder ", "Rectangle ", "Oval ", "Freeform ",
83
+ )
84
+
85
+
86
+ def _classify_chart_type(chart) -> str:
87
+ """Map a python-pptx chart to a kernel chart_type string.
88
+
89
+ Single plot → that plot's type. Multiple plots, or plots with mixed types,
90
+ → ``"combo"``. Unknown plot class → ``"unknown"`` for that plot, which can
91
+ still yield ``combo`` if mixed.
92
+ """
93
+ plot_types: List[str] = []
94
+ for plot in chart.plots:
95
+ cls = plot.__class__.__name__.lower()
96
+ if "bar" in cls or "column" in cls:
97
+ plot_types.append("bar")
98
+ elif "line" in cls:
99
+ plot_types.append("line")
100
+ elif "area" in cls:
101
+ plot_types.append("area")
102
+ elif "pie" in cls or "doughnut" in cls:
103
+ plot_types.append("pie")
104
+ elif "scatter" in cls or "xy" in cls:
105
+ plot_types.append("scatter")
106
+ elif "bubble" in cls:
107
+ plot_types.append("bubble")
108
+ else:
109
+ plot_types.append("unknown")
110
+
111
+ if not plot_types:
112
+ return "unknown"
113
+ if len(set(plot_types)) > 1:
114
+ return "combo"
115
+ return plot_types[0]
116
+
117
+
118
+ def _extract_chart_part(chart) -> str:
119
+ """Return chart part path like ``'ppt/charts/chart1.xml'`` (strip leading ``/``)."""
120
+ return str(chart.part.partname).lstrip("/")
121
+
122
+
123
+ def _has_embedded_workbook(chart) -> bool:
124
+ """True iff chart references an embedded xlsx package (not an external link)."""
125
+ for rel in chart.part.rels.values():
126
+ # Embedded workbook is exposed as an Office Open XML package relationship.
127
+ if "package" in rel.reltype.lower():
128
+ return True
129
+ return False
130
+
131
+
132
+ def _get_category_count(chart) -> int:
133
+ """Number of categories on the first plot. ``0`` for chart types with no
134
+ categorical axis (e.g. pure scatter/bubble)."""
135
+ if not chart.plots:
136
+ return 0
137
+ try:
138
+ cats = chart.plots[0].categories
139
+ if cats is None:
140
+ return 0
141
+ return len(list(cats))
142
+ except (AttributeError, KeyError, ValueError):
143
+ return 0
144
+
145
+
146
+ def _get_series_info(chart) -> Tuple[int, List[str]]:
147
+ """Return ``(series_count, series_names)``. Empty list if chart has no series."""
148
+ series = list(chart.series)
149
+ return len(series), [s.name for s in series]
150
+
151
+
152
+ def _detect_explicit_name(shape_name: Optional[str]) -> Optional[str]:
153
+ """Return shape_name only if it is NOT a python-pptx default like ``'Chart 1'``."""
154
+ if not shape_name:
155
+ return None
156
+ if any(shape_name.startswith(p) for p in _DEFAULT_SHAPE_NAME_PREFIXES):
157
+ return None
158
+ return shape_name
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Public API
163
+ # ---------------------------------------------------------------------------
164
+
165
+
166
+ def inspect_pptx_charts(pptx_path: str) -> List[ChartInventoryItem]:
167
+ """Scan a .pptx and return a chart inventory.
168
+
169
+ Per ADR-0006 §1 the inventory item includes:
170
+
171
+ - ``slide_index``
172
+ - ``shape_id`` / ``shape_name``
173
+ - ``chart_index_on_slide``
174
+ - ``chart_part`` (e.g. ``"ppt/charts/chart1.xml"``)
175
+ - ``has_embedded_workbook``
176
+ - ``chart_type``
177
+ - ``category_count``, ``series_count``, ``series_names``
178
+ - ``replaceable`` flag (true for ADR-0006 §3 first-batch + has embedded workbook)
179
+ - ``warnings`` (e.g. ``"unsupported_chart_type"``, ``"missing_embedded_workbook"``)
180
+
181
+ The returned list is ordered by ``(slide_index, chart_index_on_slide)``
182
+ — natural traversal order.
183
+
184
+ Empty .pptx (no slides or no charts) returns an empty list, **not** an exception.
185
+ """
186
+ prs = Presentation(pptx_path)
187
+ inventory: List[ChartInventoryItem] = []
188
+
189
+ for slide_index, slide in enumerate(prs.slides):
190
+ chart_index_on_slide = 0
191
+ for shape in slide.shapes:
192
+ if not getattr(shape, "has_chart", False):
193
+ continue
194
+ chart = shape.chart
195
+
196
+ chart_type = _classify_chart_type(chart)
197
+ cat_count = _get_category_count(chart)
198
+ series_count, series_names = _get_series_info(chart)
199
+ has_wb = _has_embedded_workbook(chart)
200
+
201
+ warnings: List[str] = []
202
+ replaceable = chart_type in _REPLACEABLE_CHART_TYPES
203
+ if not replaceable:
204
+ warnings.append("unsupported_chart_type")
205
+ if not has_wb:
206
+ warnings.append("missing_embedded_workbook")
207
+ # No workbook → cannot do template-safe replace per ADR-0006 §3.
208
+ replaceable = False
209
+
210
+ selector = ChartSelector(
211
+ slide_index=slide_index,
212
+ shape_id=shape.shape_id,
213
+ chart_part=_extract_chart_part(chart),
214
+ explicit_name=_detect_explicit_name(shape.name),
215
+ )
216
+ inventory.append(ChartInventoryItem(
217
+ selector=selector,
218
+ shape_name=shape.name,
219
+ chart_index_on_slide=chart_index_on_slide,
220
+ chart_type=chart_type,
221
+ category_count=cat_count,
222
+ series_count=series_count,
223
+ series_names=series_names,
224
+ has_embedded_workbook=has_wb,
225
+ replaceable=replaceable,
226
+ warnings=warnings,
227
+ ))
228
+ chart_index_on_slide += 1
229
+
230
+ return inventory