siat 3.10.132__py3-none-any.whl → 3.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siat/__init__.py +0 -0
- siat/allin.py +8 -0
- siat/assets_liquidity.py +0 -0
- siat/beta_adjustment.py +0 -0
- siat/beta_adjustment_china.py +0 -0
- siat/blockchain.py +0 -0
- siat/bond.py +0 -0
- siat/bond_base.py +0 -0
- siat/bond_china.py +0 -0
- siat/bond_zh_sina.py +0 -0
- siat/capm_beta.py +0 -0
- siat/capm_beta2.py +4 -4
- siat/common.py +9 -6
- siat/compare_cross.py +0 -0
- siat/copyrights.py +0 -0
- siat/cryptocurrency.py +0 -0
- siat/economy.py +0 -0
- siat/economy2.py +0 -0
- siat/esg.py +0 -0
- siat/event_study.py +0 -0
- siat/exchange_bond_china.pickle +0 -0
- siat/fama_french.py +0 -0
- siat/fin_stmt2_yahoo.py +0 -0
- siat/financial_base.py +0 -0
- siat/financial_statements.py +0 -0
- siat/financials.py +0 -0
- siat/financials2.py +0 -0
- siat/financials_china.py +0 -0
- siat/financials_china2.py +0 -0
- siat/fund.py +0 -0
- siat/fund_china.pickle +0 -0
- siat/fund_china.py +0 -0
- siat/future_china.py +0 -0
- siat/google_authenticator.py +0 -0
- siat/grafix.py +55 -4
- siat/holding_risk.py +0 -0
- siat/luchy_draw.py +0 -0
- siat/market_china.py +0 -0
- siat/markowitz.py +0 -0
- siat/markowitz2.py +1 -0
- siat/markowitz2_20250704.py +0 -0
- siat/markowitz2_20250705.py +0 -0
- siat/markowitz_simple.py +0 -0
- siat/ml_cases.py +0 -0
- siat/ml_cases_example.py +0 -0
- siat/option_china.py +0 -0
- siat/option_pricing.py +0 -0
- siat/other_indexes.py +0 -0
- siat/risk_adjusted_return.py +0 -0
- siat/risk_adjusted_return2.py +8 -4
- siat/risk_evaluation.py +0 -0
- siat/risk_free_rate.py +0 -0
- siat/save2docx.py +345 -0
- siat/save2pdf.py +145 -0
- siat/sector_china.py +0 -0
- siat/security_price2.py +0 -0
- siat/security_prices.py +168 -6
- siat/security_trend.py +0 -0
- siat/security_trend2.py +2 -2
- siat/stock.py +11 -1
- siat/stock_advice_linear.py +0 -0
- siat/stock_base.py +0 -0
- siat/stock_china.py +0 -0
- siat/stock_info.pickle +0 -0
- siat/stock_prices_kneighbors.py +0 -0
- siat/stock_prices_linear.py +0 -0
- siat/stock_profile.py +0 -0
- siat/stock_technical.py +0 -0
- siat/stooq.py +0 -0
- siat/transaction.py +0 -0
- siat/translate.py +0 -0
- siat/valuation.py +0 -0
- siat/valuation_china.py +0 -0
- siat/var_model_validation.py +0 -0
- siat/yf_name.py +0 -0
- {siat-3.10.132.dist-info/licenses → siat-3.11.1.dist-info}/LICENSE +0 -0
- {siat-3.10.132.dist-info → siat-3.11.1.dist-info}/METADATA +234 -235
- siat-3.11.1.dist-info/RECORD +80 -0
- {siat-3.10.132.dist-info → siat-3.11.1.dist-info}/WHEEL +1 -1
- {siat-3.10.132.dist-info → siat-3.11.1.dist-info}/top_level.txt +0 -1
- build/lib/build/lib/siat/__init__.py +0 -75
- build/lib/build/lib/siat/allin.py +0 -137
- build/lib/build/lib/siat/assets_liquidity.py +0 -915
- build/lib/build/lib/siat/beta_adjustment.py +0 -1058
- build/lib/build/lib/siat/beta_adjustment_china.py +0 -548
- build/lib/build/lib/siat/blockchain.py +0 -143
- build/lib/build/lib/siat/bond.py +0 -2900
- build/lib/build/lib/siat/bond_base.py +0 -992
- build/lib/build/lib/siat/bond_china.py +0 -100
- build/lib/build/lib/siat/bond_zh_sina.py +0 -143
- build/lib/build/lib/siat/capm_beta.py +0 -783
- build/lib/build/lib/siat/capm_beta2.py +0 -887
- build/lib/build/lib/siat/common.py +0 -5360
- build/lib/build/lib/siat/compare_cross.py +0 -642
- build/lib/build/lib/siat/copyrights.py +0 -18
- build/lib/build/lib/siat/cryptocurrency.py +0 -667
- build/lib/build/lib/siat/economy.py +0 -1471
- build/lib/build/lib/siat/economy2.py +0 -1853
- build/lib/build/lib/siat/esg.py +0 -536
- build/lib/build/lib/siat/event_study.py +0 -815
- build/lib/build/lib/siat/fama_french.py +0 -1521
- build/lib/build/lib/siat/fin_stmt2_yahoo.py +0 -982
- build/lib/build/lib/siat/financial_base.py +0 -1160
- build/lib/build/lib/siat/financial_statements.py +0 -598
- build/lib/build/lib/siat/financials.py +0 -2339
- build/lib/build/lib/siat/financials2.py +0 -1278
- build/lib/build/lib/siat/financials_china.py +0 -4433
- build/lib/build/lib/siat/financials_china2.py +0 -2212
- build/lib/build/lib/siat/fund.py +0 -629
- build/lib/build/lib/siat/fund_china.py +0 -3307
- build/lib/build/lib/siat/future_china.py +0 -551
- build/lib/build/lib/siat/google_authenticator.py +0 -47
- build/lib/build/lib/siat/grafix.py +0 -3636
- build/lib/build/lib/siat/holding_risk.py +0 -867
- build/lib/build/lib/siat/luchy_draw.py +0 -638
- build/lib/build/lib/siat/market_china.py +0 -1168
- build/lib/build/lib/siat/markowitz.py +0 -2363
- build/lib/build/lib/siat/markowitz2.py +0 -3150
- build/lib/build/lib/siat/markowitz2_20250704.py +0 -2969
- build/lib/build/lib/siat/markowitz2_20250705.py +0 -3158
- build/lib/build/lib/siat/markowitz_simple.py +0 -373
- build/lib/build/lib/siat/ml_cases.py +0 -2291
- build/lib/build/lib/siat/ml_cases_example.py +0 -60
- build/lib/build/lib/siat/option_china.py +0 -3069
- build/lib/build/lib/siat/option_pricing.py +0 -1925
- build/lib/build/lib/siat/other_indexes.py +0 -409
- build/lib/build/lib/siat/risk_adjusted_return.py +0 -1576
- build/lib/build/lib/siat/risk_adjusted_return2.py +0 -1900
- build/lib/build/lib/siat/risk_evaluation.py +0 -2218
- build/lib/build/lib/siat/risk_free_rate.py +0 -351
- build/lib/build/lib/siat/sector_china.py +0 -4140
- build/lib/build/lib/siat/security_price2.py +0 -727
- build/lib/build/lib/siat/security_prices.py +0 -3408
- build/lib/build/lib/siat/security_trend.py +0 -402
- build/lib/build/lib/siat/security_trend2.py +0 -646
- build/lib/build/lib/siat/stock.py +0 -4284
- build/lib/build/lib/siat/stock_advice_linear.py +0 -934
- build/lib/build/lib/siat/stock_base.py +0 -26
- build/lib/build/lib/siat/stock_china.py +0 -2095
- build/lib/build/lib/siat/stock_prices_kneighbors.py +0 -910
- build/lib/build/lib/siat/stock_prices_linear.py +0 -386
- build/lib/build/lib/siat/stock_profile.py +0 -707
- build/lib/build/lib/siat/stock_technical.py +0 -3305
- build/lib/build/lib/siat/stooq.py +0 -74
- build/lib/build/lib/siat/transaction.py +0 -347
- build/lib/build/lib/siat/translate.py +0 -5183
- build/lib/build/lib/siat/valuation.py +0 -1378
- build/lib/build/lib/siat/valuation_china.py +0 -2076
- build/lib/build/lib/siat/var_model_validation.py +0 -444
- build/lib/build/lib/siat/yf_name.py +0 -811
- build/lib/siat/__init__.py +0 -75
- build/lib/siat/allin.py +0 -137
- build/lib/siat/assets_liquidity.py +0 -915
- build/lib/siat/beta_adjustment.py +0 -1058
- build/lib/siat/beta_adjustment_china.py +0 -548
- build/lib/siat/blockchain.py +0 -143
- build/lib/siat/bond.py +0 -2900
- build/lib/siat/bond_base.py +0 -992
- build/lib/siat/bond_china.py +0 -100
- build/lib/siat/bond_zh_sina.py +0 -143
- build/lib/siat/capm_beta.py +0 -783
- build/lib/siat/capm_beta2.py +0 -887
- build/lib/siat/common.py +0 -5360
- build/lib/siat/compare_cross.py +0 -642
- build/lib/siat/copyrights.py +0 -18
- build/lib/siat/cryptocurrency.py +0 -667
- build/lib/siat/economy.py +0 -1471
- build/lib/siat/economy2.py +0 -1853
- build/lib/siat/esg.py +0 -536
- build/lib/siat/event_study.py +0 -815
- build/lib/siat/fama_french.py +0 -1521
- build/lib/siat/fin_stmt2_yahoo.py +0 -982
- build/lib/siat/financial_base.py +0 -1160
- build/lib/siat/financial_statements.py +0 -598
- build/lib/siat/financials.py +0 -2339
- build/lib/siat/financials2.py +0 -1278
- build/lib/siat/financials_china.py +0 -4433
- build/lib/siat/financials_china2.py +0 -2212
- build/lib/siat/fund.py +0 -629
- build/lib/siat/fund_china.py +0 -3307
- build/lib/siat/future_china.py +0 -551
- build/lib/siat/google_authenticator.py +0 -47
- build/lib/siat/grafix.py +0 -3636
- build/lib/siat/holding_risk.py +0 -867
- build/lib/siat/luchy_draw.py +0 -638
- build/lib/siat/market_china.py +0 -1168
- build/lib/siat/markowitz.py +0 -2363
- build/lib/siat/markowitz2.py +0 -3150
- build/lib/siat/markowitz2_20250704.py +0 -2969
- build/lib/siat/markowitz2_20250705.py +0 -3158
- build/lib/siat/markowitz_simple.py +0 -373
- build/lib/siat/ml_cases.py +0 -2291
- build/lib/siat/ml_cases_example.py +0 -60
- build/lib/siat/option_china.py +0 -3069
- build/lib/siat/option_pricing.py +0 -1925
- build/lib/siat/other_indexes.py +0 -409
- build/lib/siat/risk_adjusted_return.py +0 -1576
- build/lib/siat/risk_adjusted_return2.py +0 -1900
- build/lib/siat/risk_evaluation.py +0 -2218
- build/lib/siat/risk_free_rate.py +0 -351
- build/lib/siat/sector_china.py +0 -4140
- build/lib/siat/security_price2.py +0 -727
- build/lib/siat/security_prices.py +0 -3408
- build/lib/siat/security_trend.py +0 -402
- build/lib/siat/security_trend2.py +0 -646
- build/lib/siat/stock.py +0 -4284
- build/lib/siat/stock_advice_linear.py +0 -934
- build/lib/siat/stock_base.py +0 -26
- build/lib/siat/stock_china.py +0 -2095
- build/lib/siat/stock_prices_kneighbors.py +0 -910
- build/lib/siat/stock_prices_linear.py +0 -386
- build/lib/siat/stock_profile.py +0 -707
- build/lib/siat/stock_technical.py +0 -3305
- build/lib/siat/stooq.py +0 -74
- build/lib/siat/transaction.py +0 -347
- build/lib/siat/translate.py +0 -5183
- build/lib/siat/valuation.py +0 -1378
- build/lib/siat/valuation_china.py +0 -2076
- build/lib/siat/var_model_validation.py +0 -444
- build/lib/siat/yf_name.py +0 -811
- siat-3.10.132.dist-info/RECORD +0 -218
siat/save2docx.py
ADDED
@@ -0,0 +1,345 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""
|
3
|
+
本模块功能:转换ipynb文件为docx,带有的目录,代码行加边框,图像适配页宽。
|
4
|
+
注意:
|
5
|
+
需要安装pandoc并将其路径加入操作系统的PATH。
|
6
|
+
可在Anaconda Prompt或macOS Terminal下输入pandoc尝试,若未加入PATH则提示找不到。
|
7
|
+
尚存问题:
|
8
|
+
1. 标题行未居中,且重复生成;
|
9
|
+
2. 目录页码不准确,需要手动更新;
|
10
|
+
3. 若docx文件已打开出错。
|
11
|
+
所属工具包:证券投资分析工具SIAT
|
12
|
+
SIAT:Security Investment Analysis Tool
|
13
|
+
创建日期:2025年7月8日
|
14
|
+
最新修订日期:2025年7月8日
|
15
|
+
作者:王德宏 (WANG Dehong, Peter)
|
16
|
+
作者单位:北京外国语大学国际商学院
|
17
|
+
作者邮件:wdehong2000@163.com
|
18
|
+
版权所有:王德宏
|
19
|
+
用途限制:仅限研究与教学使用。
|
20
|
+
特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
|
21
|
+
"""
|
22
|
+
|
23
|
+
#==============================================================================
|
24
|
+
|
25
|
+
import os
|
26
|
+
import errno
|
27
|
+
import tempfile
|
28
|
+
import subprocess
|
29
|
+
|
30
|
+
import nbformat
|
31
|
+
from nbconvert import HTMLExporter
|
32
|
+
import pypandoc
|
33
|
+
|
34
|
+
from docx import Document
|
35
|
+
from docx.oxml import OxmlElement
|
36
|
+
from docx.oxml.ns import qn
|
37
|
+
from docx.shared import Mm
|
38
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
39
|
+
from docx.enum.table import WD_TABLE_ALIGNMENT
|
40
|
+
from docx.text.paragraph import Paragraph
|
41
|
+
|
42
|
+
# 预设纸张尺寸(单位:毫米)
|
43
|
+
PAGE_SIZES = {"A4": (210, 297), "A3": (297, 420)}
|
44
|
+
|
45
|
+
|
46
|
+
def _add_border_to_paragraph(paragraph):
|
47
|
+
"""给 paragraph 添加四边单线边框"""
|
48
|
+
p = paragraph._p
|
49
|
+
pPr = p.get_or_add_pPr()
|
50
|
+
pBdr = OxmlElement('w:pBdr')
|
51
|
+
for edge in ('top', 'left', 'bottom', 'right'):
|
52
|
+
elm = OxmlElement(f'w:{edge}')
|
53
|
+
elm.set(qn('w:val'), 'single')
|
54
|
+
elm.set(qn('w:sz'), '4')
|
55
|
+
elm.set(qn('w:space'), '4')
|
56
|
+
elm.set(qn('w:color'), 'auto')
|
57
|
+
pBdr.append(elm)
|
58
|
+
pPr.append(pBdr)
|
59
|
+
|
60
|
+
|
61
|
+
def _insert_native_toc(after_paragraph):
|
62
|
+
"""
|
63
|
+
在 after_paragraph 之后插入一个 Word 原生 TOC 域,
|
64
|
+
支持更新标题和页码,涵盖级别 1–9。
|
65
|
+
"""
|
66
|
+
# 构造 <w:p> 节点
|
67
|
+
toc_p = OxmlElement('w:p')
|
68
|
+
|
69
|
+
# 1) fldChar begin
|
70
|
+
r1 = OxmlElement('w:r')
|
71
|
+
fld_char_begin = OxmlElement('w:fldChar')
|
72
|
+
fld_char_begin.set(qn('w:fldCharType'), 'begin')
|
73
|
+
r1.append(fld_char_begin)
|
74
|
+
toc_p.append(r1)
|
75
|
+
|
76
|
+
# 2) instrText
|
77
|
+
r2 = OxmlElement('w:r')
|
78
|
+
instr = OxmlElement('w:instrText')
|
79
|
+
instr.set(qn('xml:space'), 'preserve')
|
80
|
+
instr.text = 'TOC \\o "1-9" \\h \\z \\u'
|
81
|
+
r2.append(instr)
|
82
|
+
toc_p.append(r2)
|
83
|
+
|
84
|
+
# 3) fldChar separate
|
85
|
+
r3 = OxmlElement('w:r')
|
86
|
+
fld_char_sep = OxmlElement('w:fldChar')
|
87
|
+
fld_char_sep.set(qn('w:fldCharType'), 'separate')
|
88
|
+
r3.append(fld_char_sep)
|
89
|
+
toc_p.append(r3)
|
90
|
+
|
91
|
+
# 4) 占位文本(可选)
|
92
|
+
r4 = OxmlElement('w:r')
|
93
|
+
t = OxmlElement('w:t')
|
94
|
+
t.text = '右击此处更新目录'
|
95
|
+
r4.append(t)
|
96
|
+
toc_p.append(r4)
|
97
|
+
|
98
|
+
# 5) fldChar end
|
99
|
+
r5 = OxmlElement('w:r')
|
100
|
+
fld_char_end = OxmlElement('w:fldChar')
|
101
|
+
fld_char_end.set(qn('w:fldCharType'), 'end')
|
102
|
+
r5.append(fld_char_end)
|
103
|
+
toc_p.append(r5)
|
104
|
+
|
105
|
+
# 插入
|
106
|
+
after_paragraph._p.addnext(toc_p)
|
107
|
+
|
108
|
+
|
109
|
+
def convert_ipynb_to_docx(ipynb_path, docx_path=None, page_size="A3"):
|
110
|
+
"""
|
111
|
+
将 .ipynb 转为 .docx,并实现:
|
112
|
+
1. 第一 Markdown 单元首行做文档标题
|
113
|
+
2. 在第2行插入可更新的 Word 原生 TOC(1–9 级)
|
114
|
+
3. 所有标题左对齐
|
115
|
+
4. 仅给原始 code 单元段落加边框(不含输出)
|
116
|
+
5. 表格等分列宽居中;图像放大至页宽并居中
|
117
|
+
6. 若目标 docx 正被打开,抛出提示“请先关闭文件”
|
118
|
+
"""
|
119
|
+
# ---- 1. 检查输入 & 输出路径 ----
|
120
|
+
if not os.path.isfile(ipynb_path):
|
121
|
+
raise FileNotFoundError(f"找不到输入文件:{ipynb_path}")
|
122
|
+
if docx_path is None:
|
123
|
+
base, _ = os.path.splitext(ipynb_path)
|
124
|
+
docx_path = base + ".docx"
|
125
|
+
|
126
|
+
# ---- 2. 读取 Notebook,提取标题 & 收集 code 单元源码 ----
|
127
|
+
nb = nbformat.read(ipynb_path, as_version=4)
|
128
|
+
title = None
|
129
|
+
code_blocks = []
|
130
|
+
for cell in nb.cells:
|
131
|
+
if cell.cell_type == "markdown" and title is None:
|
132
|
+
lines = cell.source.strip().splitlines()
|
133
|
+
if lines:
|
134
|
+
title = lines[0].lstrip("# ").strip()
|
135
|
+
# 去除这行,避免后面重复
|
136
|
+
cell.source = "\n".join(lines[1:]).strip()
|
137
|
+
if cell.cell_type == "code":
|
138
|
+
code_blocks.append(cell.source.rstrip())
|
139
|
+
if not title:
|
140
|
+
title = os.path.splitext(os.path.basename(ipynb_path))[0]
|
141
|
+
|
142
|
+
# ---- 3. 确保 Pandoc 可用 ----
|
143
|
+
try:
|
144
|
+
pypandoc.get_pandoc_version()
|
145
|
+
except OSError:
|
146
|
+
pypandoc.download_pandoc()
|
147
|
+
|
148
|
+
# ---- 4. Notebook → HTML(嵌入图像) ----
|
149
|
+
exporter = HTMLExporter()
|
150
|
+
exporter.embed_images = True
|
151
|
+
html_body, _ = exporter.from_notebook_node(nb)
|
152
|
+
html = f"<h1>{title}</h1>\n" + html_body
|
153
|
+
|
154
|
+
# ---- 5. HTML → DOCX via Pandoc(或 subprocess fallback) ----
|
155
|
+
try:
|
156
|
+
pypandoc.convert_text(
|
157
|
+
html, to="docx", format="html",
|
158
|
+
outputfile=docx_path, encoding="utf-8"
|
159
|
+
)
|
160
|
+
except Exception:
|
161
|
+
# fallback 到外部 pandoc
|
162
|
+
with tempfile.NamedTemporaryFile("w", suffix=".html",
|
163
|
+
delete=False,
|
164
|
+
encoding="utf-8") as tmp:
|
165
|
+
tmp.write(html)
|
166
|
+
html_file = tmp.name
|
167
|
+
try:
|
168
|
+
try:
|
169
|
+
subprocess.run(
|
170
|
+
["pandoc", html_file, "-f", "html", "-t", "docx", "-o", docx_path],
|
171
|
+
check=True, capture_output=True
|
172
|
+
)
|
173
|
+
except subprocess.CalledProcessError as e:
|
174
|
+
err = e.stderr.decode("utf-8", errors="ignore")
|
175
|
+
low = err.lower()
|
176
|
+
if "permission denied" in low or "could not open file" in low:
|
177
|
+
raise PermissionError(
|
178
|
+
f"无法写入 {docx_path}:文件可能已被打开,请先关闭后重试。"
|
179
|
+
)
|
180
|
+
raise RuntimeError(f"Pandoc 转换失败:{err}")
|
181
|
+
finally:
|
182
|
+
os.remove(html_file)
|
183
|
+
|
184
|
+
# ---- 6. 后处理 DOCX ----
|
185
|
+
try:
|
186
|
+
doc = Document(docx_path)
|
187
|
+
except (PermissionError, OSError) as e:
|
188
|
+
if getattr(e, "errno", None) in (errno.EACCES, errno.EPERM):
|
189
|
+
raise PermissionError(
|
190
|
+
f"无法打开 {docx_path}:文件可能已被打开,请先关闭后重试。"
|
191
|
+
)
|
192
|
+
raise
|
193
|
+
|
194
|
+
# 6.1 页面尺寸 & 边距
|
195
|
+
sec = doc.sections[0]
|
196
|
+
if page_size.upper() in PAGE_SIZES:
|
197
|
+
w_mm, h_mm = PAGE_SIZES[page_size.upper()]
|
198
|
+
sec.page_width, sec.page_height = Mm(w_mm), Mm(h_mm)
|
199
|
+
for m in ("left_margin", "right_margin", "top_margin", "bottom_margin"):
|
200
|
+
setattr(sec, m, Mm(25.4))
|
201
|
+
avail_w = sec.page_width - sec.left_margin - sec.right_margin
|
202
|
+
|
203
|
+
# 6.2 第一段替换为标题,设为 Heading1,左对齐
|
204
|
+
p0 = doc.paragraphs[0]
|
205
|
+
p0.text = title
|
206
|
+
p0.style = doc.styles["Heading 1"]
|
207
|
+
p0.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
208
|
+
|
209
|
+
# 6.3 在第2行插入 Word 本地 TOC
|
210
|
+
_insert_native_toc(p0)
|
211
|
+
|
212
|
+
# 6.4 强制 Word 打开时自动更新目录
|
213
|
+
try:
|
214
|
+
settings = doc.settings.element
|
215
|
+
upd = OxmlElement("w:updateFields")
|
216
|
+
upd.set(qn("w:val"), "true")
|
217
|
+
settings.append(upd)
|
218
|
+
except Exception:
|
219
|
+
pass # 部分 python-docx 版本无此接口
|
220
|
+
|
221
|
+
# 6.5 所有标题左对齐
|
222
|
+
for p in doc.paragraphs:
|
223
|
+
if p.style.name.startswith("Heading"):
|
224
|
+
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
225
|
+
|
226
|
+
# 6.6 仅给原始 code 单元对应段落加边框
|
227
|
+
for p in doc.paragraphs:
|
228
|
+
if "code" in p.style.name.lower():
|
229
|
+
txt = p.text.rstrip()
|
230
|
+
if any(txt == block for block in code_blocks):
|
231
|
+
_add_border_to_paragraph(p)
|
232
|
+
|
233
|
+
# 6.7 表格等分列宽并居中
|
234
|
+
for tbl in doc.tables:
|
235
|
+
tbl.alignment = WD_TABLE_ALIGNMENT.CENTER
|
236
|
+
tbl.allow_autofit = False
|
237
|
+
cols = len(tbl.columns) or 1
|
238
|
+
col_w = avail_w // cols
|
239
|
+
for col in tbl.columns:
|
240
|
+
for cell in col.cells:
|
241
|
+
cell.width = col_w
|
242
|
+
|
243
|
+
# 6.8 图像放大至页宽并居中
|
244
|
+
for shp in doc.inline_shapes:
|
245
|
+
ow, oh = shp.width, shp.height
|
246
|
+
fact = avail_w / ow
|
247
|
+
shp.width = avail_w
|
248
|
+
shp.height = int(oh * fact)
|
249
|
+
p_el = shp._inline.getparent().getparent()
|
250
|
+
Paragraph(p_el, doc).alignment = WD_ALIGN_PARAGRAPH.CENTER
|
251
|
+
|
252
|
+
# ---- 7. 保存并捕捉写入锁定 ----
|
253
|
+
try:
|
254
|
+
doc.save(docx_path)
|
255
|
+
except (PermissionError, OSError) as e:
|
256
|
+
if getattr(e, "errno", None) in (errno.EACCES, errno.EPERM):
|
257
|
+
raise PermissionError(
|
258
|
+
f"无法写入 {docx_path}:文件可能已被打开,请先关闭后重试。"
|
259
|
+
)
|
260
|
+
raise
|
261
|
+
|
262
|
+
return docx_path
|
263
|
+
|
264
|
+
|
265
|
+
#==============================================================================
|
266
|
+
import os
|
267
|
+
import sys
|
268
|
+
import psutil
|
269
|
+
|
270
|
+
def is_file_opened(file_path: str) -> bool:
|
271
|
+
"""
|
272
|
+
检测文件是否被其他程序打开(跨平台)
|
273
|
+
:param file_path: 文件路径
|
274
|
+
:return: True-被占用, False-未占用或不存在
|
275
|
+
"""
|
276
|
+
# 检查文件是否存在
|
277
|
+
if not os.path.exists(file_path):
|
278
|
+
return False
|
279
|
+
|
280
|
+
abs_path = os.path.abspath(file_path) # 转为绝对路径
|
281
|
+
|
282
|
+
# 方法1:异常捕获法(快速检测)
|
283
|
+
try:
|
284
|
+
with open(abs_path, "a") as f: # 追加模式(不破坏内容)
|
285
|
+
pass
|
286
|
+
return False # 成功打开说明未被占用
|
287
|
+
except (OSError, PermissionError):
|
288
|
+
pass # 继续尝试其他方法
|
289
|
+
|
290
|
+
# 方法2:进程扫描法(精确检测)
|
291
|
+
try:
|
292
|
+
for proc in psutil.process_iter(['pid', 'name', 'open_files']):
|
293
|
+
try:
|
294
|
+
open_files = proc.info.get('open_files')
|
295
|
+
if open_files and any(f.path == abs_path for f in open_files):
|
296
|
+
return True
|
297
|
+
except (psutil.AccessDenied, psutil.NoSuchProcess):
|
298
|
+
continue
|
299
|
+
except NameError: # psutil未安装
|
300
|
+
pass
|
301
|
+
|
302
|
+
# 方法3:文件锁试探法(最终回退)
|
303
|
+
try:
|
304
|
+
if sys.platform == 'win32':
|
305
|
+
import msvcrt
|
306
|
+
with open(abs_path, "a") as f:
|
307
|
+
msvcrt.locking(f.fileno(), msvcrt.LK_NBLCK, 1) # 非阻塞锁
|
308
|
+
else:
|
309
|
+
import fcntl
|
310
|
+
with open(abs_path, "a") as f:
|
311
|
+
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) # 非阻塞独占锁
|
312
|
+
return False
|
313
|
+
except (OSError, BlockingIOError, ImportError):
|
314
|
+
return True # 所有检测均失败视为占用
|
315
|
+
return False
|
316
|
+
|
317
|
+
#==============================================================================
|
318
|
+
|
319
|
+
def ipynb2docx(ipynb_path, page_size="A3"):
|
320
|
+
"""
|
321
|
+
将 .ipynb 转为 .docx,特性:
|
322
|
+
1. Markdown 首行做文档标题
|
323
|
+
2. 在第 2 行插入全文 TOC(1–9 级)
|
324
|
+
3. 所有标题左对齐,保留原字号
|
325
|
+
4. 仅为“代码段”加边框,不影响输出
|
326
|
+
5. 表格均分列宽并居中
|
327
|
+
6. 图像放大至可用页宽并居中
|
328
|
+
7. 若目标文件已打开,捕获并提示“请先关闭文件”
|
329
|
+
"""
|
330
|
+
base, _ = os.path.splitext(ipynb_path)
|
331
|
+
docx_path = base + ".docx"
|
332
|
+
|
333
|
+
# 检测docx文件是否已被打开
|
334
|
+
if is_file_opened(docx_path):
|
335
|
+
print(f"Warning: {docx_path} occupied by other app, please close it and try again")
|
336
|
+
return
|
337
|
+
|
338
|
+
result = convert_ipynb_to_docx(ipynb_path, docx_path=None, page_size=page_size)
|
339
|
+
print(f"{result} created with TOC in {page_size} size")
|
340
|
+
print(f"However, TOC needs update manually in Microsoft Word")
|
341
|
+
print(f"And, title and some other things may need fine tuned as well")
|
342
|
+
|
343
|
+
return
|
344
|
+
|
345
|
+
|
siat/save2pdf.py
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""
|
3
|
+
本模块功能:转换ipynb文件为pdf,带有可跳转的目录(目前一级标题定位还不准确,二级以下目录定位较准确,但已可用)
|
4
|
+
所属工具包:证券投资分析工具SIAT
|
5
|
+
SIAT:Security Investment Analysis Tool
|
6
|
+
创建日期:2025年7月8日
|
7
|
+
最新修订日期:2025年7月8日
|
8
|
+
作者:王德宏 (WANG Dehong, Peter)
|
9
|
+
作者单位:北京外国语大学国际商学院
|
10
|
+
作者邮件:wdehong2000@163.com
|
11
|
+
版权所有:王德宏
|
12
|
+
用途限制:仅限研究与教学使用。
|
13
|
+
特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
|
14
|
+
"""
|
15
|
+
|
16
|
+
#==============================================================================
|
17
|
+
|
18
|
+
# 首次运行前,请安装依赖:
|
19
|
+
# !pip install nbformat nbconvert playwright pymupdf nest_asyncio
|
20
|
+
# !playwright install
|
21
|
+
|
22
|
+
import os
|
23
|
+
import re
|
24
|
+
import tempfile
|
25
|
+
import asyncio
|
26
|
+
|
27
|
+
import nest_asyncio
|
28
|
+
import nbformat
|
29
|
+
from nbconvert import HTMLExporter
|
30
|
+
from playwright.async_api import async_playwright
|
31
|
+
import fitz # PyMuPDF
|
32
|
+
|
33
|
+
nest_asyncio.apply() # 使 asyncio.run 在 Notebook 中可用
|
34
|
+
|
35
|
+
def ipynb2pdf(ipynb_path: str) -> str:
|
36
|
+
"""
|
37
|
+
将 .ipynb 转为带可跳转目录书签的 PDF。
|
38
|
+
返回生成的 PDF 文件路径。
|
39
|
+
"""
|
40
|
+
if not os.path.isfile(ipynb_path):
|
41
|
+
raise FileNotFoundError(f"找不到文件:{ipynb_path}")
|
42
|
+
output_pdf = ipynb_path[:-6] + ".pdf"
|
43
|
+
|
44
|
+
# 1. 读 notebook → 提取目录结构
|
45
|
+
nb = nbformat.read(ipynb_path, as_version=4)
|
46
|
+
toc = _extract_toc(nb)
|
47
|
+
|
48
|
+
# 2. nb → HTML
|
49
|
+
exporter = HTMLExporter()
|
50
|
+
html_body, _ = exporter.from_notebook_node(nb)
|
51
|
+
|
52
|
+
# 3. 临时写 HTML / PDF
|
53
|
+
with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as th:
|
54
|
+
th.write(html_body)
|
55
|
+
html_path = th.name
|
56
|
+
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tp:
|
57
|
+
tmp_pdf = tp.name
|
58
|
+
|
59
|
+
# 4. Playwright 渲染 HTML → PDF
|
60
|
+
asyncio.run(_html_to_pdf(html_path, tmp_pdf))
|
61
|
+
|
62
|
+
# 5. PyMuPDF 添加书签
|
63
|
+
_add_bookmarks(tmp_pdf, output_pdf, toc)
|
64
|
+
|
65
|
+
# 6. 清理
|
66
|
+
os.unlink(html_path)
|
67
|
+
os.unlink(tmp_pdf)
|
68
|
+
|
69
|
+
from pathlib import Path
|
70
|
+
full_path = Path(output_pdf)
|
71
|
+
# 提取文件名
|
72
|
+
filename = full_path.name # 'report.pdf'
|
73
|
+
# 提取路径
|
74
|
+
directory = full_path.parent # PosixPath('/Users/peter/Documents')
|
75
|
+
|
76
|
+
print(f"✅ {filename} is created with TOC")
|
77
|
+
print(f"✅ It is in {directory}")
|
78
|
+
|
79
|
+
#return output_pdf
|
80
|
+
return
|
81
|
+
|
82
|
+
async def _html_to_pdf(html_path: str, pdf_path: str):
|
83
|
+
async with async_playwright() as p:
|
84
|
+
browser = await p.chromium.launch()
|
85
|
+
page = await browser.new_page()
|
86
|
+
await page.goto(f"file://{html_path}")
|
87
|
+
await page.pdf(
|
88
|
+
path=pdf_path,
|
89
|
+
#format="A4",
|
90
|
+
format="A3",
|
91
|
+
print_background=True,
|
92
|
+
margin={"top":"20mm","bottom":"20mm","left":"20mm","right":"20mm"},
|
93
|
+
)
|
94
|
+
await browser.close()
|
95
|
+
|
96
|
+
def _extract_toc(nb_node) -> list[tuple[int,str]]:
|
97
|
+
"""
|
98
|
+
从每个 markdown 单元首行提取 # 级别和标题文本,
|
99
|
+
返回 [(level, title), …]
|
100
|
+
"""
|
101
|
+
toc = []
|
102
|
+
for cell in nb_node.cells:
|
103
|
+
if cell.cell_type != "markdown":
|
104
|
+
continue
|
105
|
+
first = cell.source.strip().splitlines()[0]
|
106
|
+
m = re.match(r"^(#{1,6})\s+(.*)", first)
|
107
|
+
if m:
|
108
|
+
toc.append((len(m.group(1)), m.group(2).strip()))
|
109
|
+
return toc
|
110
|
+
|
111
|
+
def _add_bookmarks(input_pdf: str, output_pdf: str, toc: list[tuple[int,str]]):
|
112
|
+
"""
|
113
|
+
用 PyMuPDF 打开临时 PDF,按 toc 列表查找页码,
|
114
|
+
然后用 set_toc() 批量写入书签。
|
115
|
+
"""
|
116
|
+
doc = fitz.open(input_pdf)
|
117
|
+
outline = []
|
118
|
+
for level, title in toc:
|
119
|
+
page_num = 1
|
120
|
+
# 搜索标题出现在第几页(0-based → +1)
|
121
|
+
for i in range(doc.page_count):
|
122
|
+
if title in doc.load_page(i).get_text():
|
123
|
+
page_num = i + 1
|
124
|
+
break
|
125
|
+
outline.append([level, title, page_num])
|
126
|
+
|
127
|
+
# 批量设置目录书签
|
128
|
+
doc.set_toc(outline)
|
129
|
+
doc.save(output_pdf)
|
130
|
+
|
131
|
+
# 使用示例(另起一个 cell 运行):
|
132
|
+
# ipynb = globals().get("__session__")
|
133
|
+
# ipynb2pdf(ipynb)
|
134
|
+
|
135
|
+
|
136
|
+
#==============================================================================
|
137
|
+
|
138
|
+
#==============================================================================
|
139
|
+
#==============================================================================
|
140
|
+
#==============================================================================
|
141
|
+
#==============================================================================
|
142
|
+
#==============================================================================
|
143
|
+
#==============================================================================
|
144
|
+
#==============================================================================
|
145
|
+
#==============================================================================
|
siat/sector_china.py
CHANGED
File without changes
|
siat/security_price2.py
CHANGED
File without changes
|