o2md 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- o2md/__init__.py +3 -0
- o2md/chart_utils.py +198 -0
- o2md/d2md.py +3404 -0
- o2md/d2md_charts.py +314 -0
- o2md/i18n.py +76 -0
- o2md/img2md.py +369 -0
- o2md/isolated_group_renderer.py +2791 -0
- o2md/jtd2md.py +801 -0
- o2md/jtd2md_table.py +496 -0
- o2md/locale/en/LC_MESSAGES/o2md.mo +0 -0
- o2md/locale/en/LC_MESSAGES/o2md.po +140 -0
- o2md/locale/ja/LC_MESSAGES/o2md.mo +0 -0
- o2md/locale/ja/LC_MESSAGES/o2md.po +140 -0
- o2md/o2md.py +756 -0
- o2md/omml_converter/__init__.py +12 -0
- o2md/omml_converter/latex_dict.py +286 -0
- o2md/omml_converter/omml.py +375 -0
- o2md/omml_converter/pre_process.py +179 -0
- o2md/p2md.py +1315 -0
- o2md/pdf2md.py +2320 -0
- o2md/pdf2md_docling.py +365 -0
- o2md/pdf2md_figures.py +2527 -0
- o2md/pdf2md_ocr.py +1121 -0
- o2md/pdf2md_tables.py +734 -0
- o2md/pdf2md_text.py +2348 -0
- o2md/utils.py +283 -0
- o2md/x2md.py +2973 -0
- o2md/x2md_charts.py +353 -0
- o2md/x2md_graphics.py +3771 -0
- o2md/x2md_tables.py +4916 -0
- o2md-0.1.0.dist-info/METADATA +521 -0
- o2md-0.1.0.dist-info/RECORD +35 -0
- o2md-0.1.0.dist-info/WHEEL +4 -0
- o2md-0.1.0.dist-info/entry_points.txt +8 -0
- o2md-0.1.0.dist-info/licenses/LICENSE +21 -0
o2md/__init__.py
ADDED
o2md/chart_utils.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
チャートデータ抽出ユーティリティ
|
|
4
|
+
|
|
5
|
+
Excel/Wordのチャートからデータを抽出し、Markdownテーブルに変換するための
|
|
6
|
+
共通モジュール。
|
|
7
|
+
|
|
8
|
+
対応チャートタイプ:
|
|
9
|
+
- 棒グラフ (bar)
|
|
10
|
+
- 折れ線グラフ (line)
|
|
11
|
+
- 円グラフ (pie)
|
|
12
|
+
- 散布図 (scatter)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import List, Optional, Union
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SeriesData:
|
|
21
|
+
"""チャートシリーズのデータを保持するクラス"""
|
|
22
|
+
name: str
|
|
23
|
+
values: List[Union[float, int, str]]
|
|
24
|
+
x_values: Optional[List[Union[float, int, str]]] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ChartData:
|
|
29
|
+
"""チャート全体のデータを保持するクラス"""
|
|
30
|
+
chart_type: str
|
|
31
|
+
title: Optional[str]
|
|
32
|
+
categories: Optional[List[str]]
|
|
33
|
+
series: List[SeriesData] = field(default_factory=list)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def chart_data_to_markdown(chart_data: ChartData) -> str:
|
|
37
|
+
"""
|
|
38
|
+
ChartDataをMarkdownテーブルに変換する
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
chart_data: 変換するチャートデータ
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Markdown形式のテーブル文字列
|
|
45
|
+
"""
|
|
46
|
+
md_lines = []
|
|
47
|
+
|
|
48
|
+
md_lines.append("\n### Chart")
|
|
49
|
+
if chart_data.title:
|
|
50
|
+
md_lines[-1] += f": {chart_data.title}"
|
|
51
|
+
md_lines.append("")
|
|
52
|
+
|
|
53
|
+
if not chart_data.series:
|
|
54
|
+
md_lines.append("[データなし]")
|
|
55
|
+
md_lines.append("")
|
|
56
|
+
return "\n".join(md_lines)
|
|
57
|
+
|
|
58
|
+
if chart_data.chart_type == 'scatter':
|
|
59
|
+
table_md = _build_scatter_table(chart_data)
|
|
60
|
+
else:
|
|
61
|
+
table_md = _build_standard_table(chart_data)
|
|
62
|
+
|
|
63
|
+
md_lines.append(table_md)
|
|
64
|
+
md_lines.append("")
|
|
65
|
+
|
|
66
|
+
return "\n".join(md_lines)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _build_standard_table(chart_data: ChartData) -> str:
|
|
70
|
+
"""
|
|
71
|
+
棒グラフ、折れ線グラフ、円グラフ用のMarkdownテーブルを構築する
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
chart_data: チャートデータ
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Markdownテーブル文字列
|
|
78
|
+
"""
|
|
79
|
+
series_names = []
|
|
80
|
+
for i, s in enumerate(chart_data.series):
|
|
81
|
+
if s.name:
|
|
82
|
+
series_names.append(s.name)
|
|
83
|
+
else:
|
|
84
|
+
series_names.append(f"Series{i+1}")
|
|
85
|
+
|
|
86
|
+
header = ["Category"] + series_names
|
|
87
|
+
|
|
88
|
+
rows = []
|
|
89
|
+
categories = chart_data.categories
|
|
90
|
+
if not categories and chart_data.series:
|
|
91
|
+
first_series = chart_data.series[0]
|
|
92
|
+
if first_series.values:
|
|
93
|
+
categories = [str(i+1) for i in range(len(first_series.values))]
|
|
94
|
+
|
|
95
|
+
num_categories = len(categories) if categories else 0
|
|
96
|
+
|
|
97
|
+
for idx in range(num_categories):
|
|
98
|
+
if categories:
|
|
99
|
+
row = [str(categories[idx])]
|
|
100
|
+
else:
|
|
101
|
+
row = [str(idx + 1)]
|
|
102
|
+
|
|
103
|
+
for series in chart_data.series:
|
|
104
|
+
if idx < len(series.values):
|
|
105
|
+
val = series.values[idx]
|
|
106
|
+
row.append(_format_value(val))
|
|
107
|
+
else:
|
|
108
|
+
row.append("")
|
|
109
|
+
rows.append(row)
|
|
110
|
+
|
|
111
|
+
return _build_markdown_table(header, rows)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _build_scatter_table(chart_data: ChartData) -> str:
|
|
115
|
+
"""
|
|
116
|
+
散布図用のMarkdownテーブルを構築する
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
chart_data: チャートデータ
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Markdownテーブル文字列
|
|
123
|
+
"""
|
|
124
|
+
series_names = []
|
|
125
|
+
for i, s in enumerate(chart_data.series):
|
|
126
|
+
if s.name:
|
|
127
|
+
series_names.append(s.name)
|
|
128
|
+
else:
|
|
129
|
+
series_names.append(f"Series{i+1}")
|
|
130
|
+
|
|
131
|
+
header = ["X"] + series_names
|
|
132
|
+
|
|
133
|
+
all_x = set()
|
|
134
|
+
for series in chart_data.series:
|
|
135
|
+
if series.x_values:
|
|
136
|
+
all_x.update(series.x_values)
|
|
137
|
+
x_values = sorted(all_x, key=lambda x: (isinstance(x, str), x))
|
|
138
|
+
|
|
139
|
+
rows = []
|
|
140
|
+
for x in x_values:
|
|
141
|
+
row = [_format_value(x)]
|
|
142
|
+
for series in chart_data.series:
|
|
143
|
+
if series.x_values and x in series.x_values:
|
|
144
|
+
idx = series.x_values.index(x)
|
|
145
|
+
if idx < len(series.values):
|
|
146
|
+
row.append(_format_value(series.values[idx]))
|
|
147
|
+
else:
|
|
148
|
+
row.append("")
|
|
149
|
+
else:
|
|
150
|
+
row.append("")
|
|
151
|
+
rows.append(row)
|
|
152
|
+
|
|
153
|
+
return _build_markdown_table(header, rows)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _build_markdown_table(header: List[str], rows: List[List[str]]) -> str:
|
|
157
|
+
"""
|
|
158
|
+
ヘッダーと行データからMarkdownテーブルを構築する
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
header: ヘッダー行のリスト
|
|
162
|
+
rows: データ行のリスト
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Markdownテーブル文字列
|
|
166
|
+
"""
|
|
167
|
+
lines = []
|
|
168
|
+
|
|
169
|
+
header_line = "| " + " | ".join(header) + " |"
|
|
170
|
+
lines.append(header_line)
|
|
171
|
+
|
|
172
|
+
separator = "|" + "|".join(["---"] * len(header)) + "|"
|
|
173
|
+
lines.append(separator)
|
|
174
|
+
|
|
175
|
+
for row in rows:
|
|
176
|
+
row_line = "| " + " | ".join(row) + " |"
|
|
177
|
+
lines.append(row_line)
|
|
178
|
+
|
|
179
|
+
return "\n".join(lines)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _format_value(val: Union[float, int, str, None]) -> str:
|
|
183
|
+
"""
|
|
184
|
+
値を表示用にフォーマットする
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
val: フォーマットする値
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
フォーマットされた文字列
|
|
191
|
+
"""
|
|
192
|
+
if val is None:
|
|
193
|
+
return ""
|
|
194
|
+
if isinstance(val, float):
|
|
195
|
+
if val == int(val):
|
|
196
|
+
return str(int(val))
|
|
197
|
+
return f"{val:.2f}"
|
|
198
|
+
return str(val)
|