siat 3.11.3__py3-none-any.whl → 3.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
siat/common.py CHANGED
@@ -2889,7 +2889,7 @@ if __name__=='__main__':
2889
2889
  file='stooq.py'
2890
2890
  package='pandas_datareader'
2891
2891
 
2892
- def fix_package(file='stooq.py',package='pandas_datareader'):
2892
+ def fix_package_x(file='stooq.py',package='pandas_datareader'):
2893
2893
  """
2894
2894
  功能:修复stooq.py,使用siat包中的stooq.py覆盖pandas_datareader中的同名文件
2895
2895
  注意:执行本程序需要系统管理员权限,可以系统管理员权限启动Jupyter或Spyder
@@ -2899,6 +2899,7 @@ def fix_package(file='stooq.py',package='pandas_datareader'):
2899
2899
  stooq.py pandas_datareader
2900
2900
  bond_zh_sina.py akshare
2901
2901
 
2902
+ 注意:在Python 3.13出错,暂时废弃!
2902
2903
  """
2903
2904
  #判断操作系统
2904
2905
  import sys; czxt=sys.platform
@@ -2923,7 +2924,7 @@ def fix_package(file='stooq.py',package='pandas_datareader'):
2923
2924
 
2924
2925
  #目标地址
2925
2926
  cmdstr1='import '+package
2926
- exec(cmdstr1) #无返回值地执行字符串代码
2927
+ exec(cmdstr1) #无返回值地执行字符串代码,此句在Python 3.13后台不管用了!
2927
2928
  #import pandas_datareader
2928
2929
  #objpath=pandas_datareader.__path__[0]
2929
2930
  cmdstr2=package+'.__path__[0]'
@@ -2957,6 +2958,54 @@ def fix_package(file='stooq.py',package='pandas_datareader'):
2957
2958
 
2958
2959
  return
2959
2960
 
2961
+ def fix_package(file='stooq.py', package='pandas_datareader'):
2962
+ """
2963
+ 功能:修复指定包中的文件,例如将 siat 中的 stooq.py 覆盖 pandas_datareader 中的同名文件。
2964
+ 注意:执行本程序需要系统管理员权限,可以系统管理员权限启动 Jupyter 或 Spyder。
2965
+ """
2966
+
2967
+ import sys
2968
+ import importlib.util
2969
+ import siat
2970
+ from shutil import copyfile
2971
+
2972
+ # 判断操作系统
2973
+ platform = sys.platform
2974
+ if platform.startswith('win'):
2975
+ os_type = 'windows'
2976
+ elif platform == 'darwin':
2977
+ os_type = 'mac'
2978
+ elif platform.startswith('linux'):
2979
+ os_type = 'linux'
2980
+ else:
2981
+ os_type = 'unknown'
2982
+
2983
+ # 获取源文件路径
2984
+ src_path = siat.__path__[0].replace("\\", "/") if os_type == 'windows' else siat.__path__[0]
2985
+ src_file = f"{src_path}/{file}"
2986
+
2987
+ # 获取目标包路径
2988
+ spec = importlib.util.find_spec(package)
2989
+ if spec is None or not spec.submodule_search_locations:
2990
+ print(f" #Error(fix_package): Package '{package}' not found.")
2991
+ return
2992
+
2993
+ obj_path = spec.submodule_search_locations[0].replace("\\", "/") if os_type == 'windows' else spec.submodule_search_locations[0]
2994
+ obj_file = f"{obj_path}/{file}"
2995
+
2996
+ # 执行文件复制
2997
+ try:
2998
+ copyfile(src_file, obj_file)
2999
+ except IOError as e:
3000
+ print(f" #Error(fix_package): Unable to copy file. {e}")
3001
+ print(" Solution: manually copy the file", src_file, "to the folder", obj_path)
3002
+ except Exception as e:
3003
+ print(f" #Error(fix_package): Unexpected error: {e}")
3004
+ else:
3005
+ print(f" Overrided '{file}' in '{package}'")
3006
+ print(" Please RESTART Python kernel before using siat")
3007
+
3008
+ return
2960
3009
 
2961
3010
 
2962
3011
 
@@ -2964,11 +3013,12 @@ def fix_package(file='stooq.py',package='pandas_datareader'):
2964
3013
  if __name__=='__main__':
2965
3014
  file='stock_info.pickle'
2966
3015
  package='siat'
3016
+ mode='read'
2967
3017
  developer=False
2968
3018
 
2969
3019
  file_position()
2970
3020
 
2971
- def file_position(file='stock_info.pickle',package='siat',mode='read'):
3021
+ def file_position_x(file='stock_info.pickle',package='siat',mode='read'):
2972
3022
  """
2973
3023
  功能:给定文件名file,返回其路径
2974
3024
  注意:执行本程序可能需要系统管理员权限,可以系统管理员权限启动Jupyter或Spyder
@@ -2978,6 +3028,7 @@ def file_position(file='stock_info.pickle',package='siat',mode='read'):
2978
3028
  stooq.py pandas_datareader
2979
3029
  bond_zh_sina.py akshare
2980
3030
 
3031
+ 问题:在Python 3.13上后台运行出错,暂时废弃!
2981
3032
  """
2982
3033
  #判断操作系统
2983
3034
  import sys; czxt=sys.platform
@@ -3012,6 +3063,57 @@ def file_position(file='stock_info.pickle',package='siat',mode='read'):
3012
3063
  else:
3013
3064
  return objfile
3014
3065
  #==============================================================================
3066
+ import sys
3067
+ import pickle
3068
+ import importlib
3069
+
3070
+ def file_position(file='stock_info.pickle', package='siat', mode='read'):
3071
+ """
3072
+ 功能:给定文件名file,返回其路径或读取其内容
3073
+ 参数:
3074
+ file - 目标文件名
3075
+ package - 存放该文件的 Python 包名
3076
+ mode - 'read' 则加载并返回 pickle 中的对象,否则返回文件路径
3077
+ 注意:执行本程序可能需要系统管理员权限
3078
+ """
3079
+ # 1. 判断操作系统
3080
+ czxt = sys.platform
3081
+ if czxt in ('win32', 'win64'):
3082
+ os_type = 'windows'
3083
+ elif czxt == 'darwin':
3084
+ os_type = 'mac'
3085
+ elif czxt.startswith('linux'):
3086
+ os_type = 'linux'
3087
+ else:
3088
+ os_type = 'windows'
3089
+
3090
+ # 2. 动态导入 package
3091
+ try:
3092
+ pkg = importlib.import_module(package)
3093
+ except ImportError as e:
3094
+ raise ImportError(f"无法导入包 '{package}': {e}")
3095
+
3096
+ # 3. 获取 package 的安装路径
3097
+ try:
3098
+ objpath = pkg.__path__[0]
3099
+ except (AttributeError, IndexError):
3100
+ # 如果是单文件模块,退而求其次取 __file__ 的目录
3101
+ objpath = importlib.util.find_spec(package).origin
3102
+ objpath = objpath.rsplit('/', 1)[0]
3103
+
3104
+ # 4. 拼接目标文件路径
3105
+ if os_type == 'windows':
3106
+ objpath_norm = objpath.replace('\\', '/')
3107
+ objfile = objpath_norm + '/' + file
3108
+ else:
3109
+ objfile = objpath + '/' + file
3110
+
3111
+ # 5. 根据 mode 读取或返回路径
3112
+ if mode == 'read':
3113
+ with open(objfile, 'rb') as f:
3114
+ return pickle.load(f)
3115
+ else:
3116
+ return objfile
3015
3117
 
3016
3118
 
3017
3119
  #==============================================================================
@@ -4848,8 +4950,28 @@ def ttest(sample1,sample2):
4848
4950
  return round(p_value,4)
4849
4951
 
4850
4952
  #==============================================================================
4953
+ import sys,os
4954
+
4955
+ # —— 在程序最开始处,打印并校验 Python 版本 ——
4956
+ def _check_python_version_jupyter2pdf():
4957
+ ver = sys.version_info
4958
+ version_str = f"{ver.major}.{ver.minor}.{ver.micro}"
4959
+ #print(f"使用的 Python 版本:{version_str}")
4960
+
4961
+ # 只允许 3.7 <= 版本 < 3.13
4962
+ usable=True
4963
+ if not (ver.major == 3 and 7 <= ver.minor <= 12):
4964
+ usable = False
4965
+ print(f"不支持的 Python 版本:{version_str},仅支持 Python 3.7–3.12")
4966
+ print(f"解决方案:改用ipynb2pdf或ipynb2docx")
4967
+ """
4968
+ raise RuntimeError(
4969
+ f"不支持的 Python 版本:{version_str}。"
4970
+ " 请使用 Python 3.7–3.12。"
4971
+ )
4972
+ """
4851
4973
 
4852
-
4974
+ return usable
4853
4975
 
4854
4976
  async def jupyter2pdf(notebook_path, output_pdf_path, size="A3"):
4855
4977
  """
@@ -4866,6 +4988,9 @@ async def jupyter2pdf(notebook_path, output_pdf_path, size="A3"):
4866
4988
  await convert_notebook_to_pdf(notebook_path, output_pdf_path)
4867
4989
  注意3:notebook_path和output_pdf_path中可以带路径
4868
4990
  """
4991
+ if not _check_python_version_jupyter2pdf():
4992
+ return
4993
+
4869
4994
  size=size.upper()
4870
4995
  if not size in ['A4','A3','letter']:
4871
4996
  size='A3'
@@ -4945,6 +5070,8 @@ async def jupyter2pdf2(notebook_dir, notebook_file):
4945
5070
  注意2:调用本函数的格式是异步await开头,例如:
4946
5071
  await jupyter2pdf2(notebook_dir, notebook_file)
4947
5072
  """
5073
+ if not _check_python_version_jupyter2pdf():
5074
+ return
4948
5075
 
4949
5076
  # 路径分割符号
4950
5077
  if ('/' in notebook_dir) and not ('\\' in notebook_dir):
@@ -5074,6 +5201,9 @@ async def jupyter2pdf3(notebook_path):
5074
5201
  await jupyter2pdf3(notebook_path)
5075
5202
  """
5076
5203
  DEBUG=False
5204
+
5205
+ if not _check_python_version_jupyter2pdf():
5206
+ return
5077
5207
 
5078
5208
  import os,sys
5079
5209
  # 分离目录和文件名
siat/save2docx.py CHANGED
@@ -21,6 +21,8 @@ SIAT:Security Investment Analysis Tool
21
21
  """
22
22
 
23
23
  #==============================================================================
24
+ #关闭所有警告
25
+ import warnings; warnings.filterwarnings('ignore')
24
26
 
25
27
  import os
26
28
  import errno
@@ -39,6 +41,42 @@ from docx.enum.text import WD_ALIGN_PARAGRAPH
39
41
  from docx.enum.table import WD_TABLE_ALIGNMENT
40
42
  from docx.text.paragraph import Paragraph
41
43
 
44
+ import contextlib
45
+ import io
46
+
47
+ import time
48
+ from IPython.display import display, Javascript
49
+
50
+ # —— 新增:Notebook 强制保存 ——
51
+ def _save_current_notebook():
52
+ """
53
+ 在浏览器端触发一次保存:兼容 Classic Notebook、Lab 3.x/4.x。
54
+ """
55
+ js = """
56
+ (function() {
57
+ // Classic Notebook
58
+ if (window.Jupyter && Jupyter.notebook) {
59
+ Jupyter.notebook.save_checkpoint();
60
+ }
61
+ // JupyterLab >=3: 用 app.commands
62
+ else if (window.jupyterapp && jupyterapp.commands) {
63
+ jupyterapp.commands.execute('docmanager:save');
64
+ }
65
+ // JupyterLab <=2 或其他
66
+ else if (window.require) {
67
+ require(['@jupyterlab/docmanager'], function(docManager) {
68
+ docManager.save();
69
+ });
70
+ }
71
+ })();
72
+ """
73
+ try:
74
+ display(Javascript(js))
75
+ time.sleep(0.5) # 给浏览器一点时间写盘
76
+ except Exception:
77
+ pass
78
+
79
+
42
80
  # 预设纸张尺寸(单位:毫米)
43
81
  PAGE_SIZES = {"A4": (210, 297), "A3": (297, 420)}
44
82
 
@@ -116,6 +154,10 @@ def convert_ipynb_to_docx(ipynb_path, docx_path=None, page_size="A3"):
116
154
  5. 表格等分列宽居中;图像放大至页宽并居中
117
155
  6. 若目标 docx 正被打开,抛出提示“请先关闭文件”
118
156
  """
157
+ # 0. 强制保存当前 Notebook
158
+ #print("Saving current ipynb ...")
159
+ _save_current_notebook()
160
+
119
161
  # ---- 1. 检查输入 & 输出路径 ----
120
162
  if not os.path.isfile(ipynb_path):
121
163
  raise FileNotFoundError(f"找不到输入文件:{ipynb_path}")
@@ -148,7 +190,11 @@ def convert_ipynb_to_docx(ipynb_path, docx_path=None, page_size="A3"):
148
190
  # ---- 4. Notebook → HTML(嵌入图像) ----
149
191
  exporter = HTMLExporter()
150
192
  exporter.embed_images = True
151
- html_body, _ = exporter.from_notebook_node(nb)
193
+
194
+ buf = io.StringIO()
195
+ # 屏蔽 stderr
196
+ with contextlib.redirect_stderr(buf):
197
+ html_body, _ = exporter.from_notebook_node(nb)
152
198
  html = f"<h1>{title}</h1>\n" + html_body
153
199
 
154
200
  # ---- 5. HTML → DOCX via Pandoc(或 subprocess fallback) ----
@@ -339,8 +385,7 @@ def ipynb2docx(ipynb_path, page_size="A3"):
339
385
 
340
386
  result = convert_ipynb_to_docx(ipynb_path, docx_path=None, page_size=page_size)
341
387
  print(f"{result} created with TOC in {page_size} size")
342
- print(f"However, TOC needs update manually in Microsoft Word")
343
- print(f"And, title and some other things may need fine tuned as well")
388
+ print(f"However, original title(s) and new TOC may need further adjustment")
344
389
 
345
390
  return
346
391
 
@@ -0,0 +1,147 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 本模块功能:转换ipynb文件为pdf,带有可跳转的目录(目前一级标题定位还不准确,二级以下目录定位较准确,但已可用)
4
+ 所属工具包:证券投资分析工具SIAT
5
+ SIAT:Security Investment Analysis Tool
6
+ 创建日期:2025年7月8日
7
+ 最新修订日期:2025年7月8日
8
+ 作者:王德宏 (WANG Dehong, Peter)
9
+ 作者单位:北京外国语大学国际商学院
10
+ 作者邮件:wdehong2000@163.com
11
+ 版权所有:王德宏
12
+ 用途限制:仅限研究与教学使用。
13
+ 特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
14
+ """
15
+
16
+ #==============================================================================
17
+
18
+ # 首次运行前,请安装依赖:
19
+ # !pip install nbformat nbconvert playwright pymupdf nest_asyncio
20
+ # !playwright install
21
+
22
+ import os
23
+ import re
24
+ import tempfile
25
+ import asyncio
26
+
27
+ import nest_asyncio
28
+ import nbformat
29
+ from nbconvert import HTMLExporter
30
+ from playwright.async_api import async_playwright
31
+ import fitz # PyMuPDF
32
+
33
+ nest_asyncio.apply() # 使 asyncio.run 在 Notebook 中可用
34
+
35
+ def ipynb2pdf(ipynb_path: str) -> str:
36
+ """
37
+ 将 .ipynb 转为带可跳转目录书签的 PDF。
38
+ 返回生成的 PDF 文件路径。
39
+ """
40
+ if not os.path.isfile(ipynb_path):
41
+ raise FileNotFoundError(f"找不到文件:{ipynb_path}")
42
+ output_pdf = ipynb_path[:-6] + ".pdf"
43
+
44
+ print(f"Converting to PDF ...")
45
+
46
+ # 1. 读 notebook → 提取目录结构
47
+ nb = nbformat.read(ipynb_path, as_version=4)
48
+ toc = _extract_toc(nb)
49
+
50
+ # 2. nb → HTML
51
+ exporter = HTMLExporter()
52
+ html_body, _ = exporter.from_notebook_node(nb)
53
+
54
+ # 3. 临时写 HTML / PDF
55
+ with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as th:
56
+ th.write(html_body)
57
+ html_path = th.name
58
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tp:
59
+ tmp_pdf = tp.name
60
+
61
+ # 4. Playwright 渲染 HTML → PDF
62
+ asyncio.run(_html_to_pdf(html_path, tmp_pdf))
63
+
64
+ # 5. PyMuPDF 添加书签
65
+ _add_bookmarks(tmp_pdf, output_pdf, toc)
66
+
67
+ # 6. 清理
68
+ os.unlink(html_path)
69
+ os.unlink(tmp_pdf)
70
+
71
+ from pathlib import Path
72
+ full_path = Path(output_pdf)
73
+ # 提取文件名
74
+ filename = full_path.name # 'report.pdf'
75
+ # 提取路径
76
+ directory = full_path.parent # PosixPath('/Users/peter/Documents')
77
+
78
+ print(f"✅ {filename} is created with TOC")
79
+ print(f"✅ It is in {directory}")
80
+
81
+ #return output_pdf
82
+ return
83
+
84
+ async def _html_to_pdf(html_path: str, pdf_path: str):
85
+ async with async_playwright() as p:
86
+ browser = await p.chromium.launch()
87
+ page = await browser.new_page()
88
+ await page.goto(f"file://{html_path}")
89
+ await page.pdf(
90
+ path=pdf_path,
91
+ #format="A4",
92
+ format="A3",
93
+ print_background=True,
94
+ margin={"top":"20mm","bottom":"20mm","left":"20mm","right":"20mm"},
95
+ )
96
+ await browser.close()
97
+
98
+ def _extract_toc(nb_node) -> list[tuple[int,str]]:
99
+ """
100
+ 从每个 markdown 单元首行提取 # 级别和标题文本,
101
+ 返回 [(level, title), …]
102
+ """
103
+ toc = []
104
+ for cell in nb_node.cells:
105
+ if cell.cell_type != "markdown":
106
+ continue
107
+ first = cell.source.strip().splitlines()[0]
108
+ m = re.match(r"^(#{1,6})\s+(.*)", first)
109
+ if m:
110
+ toc.append((len(m.group(1)), m.group(2).strip()))
111
+ return toc
112
+
113
+ def _add_bookmarks(input_pdf: str, output_pdf: str, toc: list[tuple[int,str]]):
114
+ """
115
+ 用 PyMuPDF 打开临时 PDF,按 toc 列表查找页码,
116
+ 然后用 set_toc() 批量写入书签。
117
+ """
118
+ doc = fitz.open(input_pdf)
119
+ outline = []
120
+ for level, title in toc:
121
+ page_num = 1
122
+ # 搜索标题出现在第几页(0-based → +1)
123
+ for i in range(doc.page_count):
124
+ if title in doc.load_page(i).get_text():
125
+ page_num = i + 1
126
+ break
127
+ outline.append([level, title, page_num])
128
+
129
+ # 批量设置目录书签
130
+ doc.set_toc(outline)
131
+ doc.save(output_pdf)
132
+
133
+ # 使用示例(另起一个 cell 运行):
134
+ # ipynb = globals().get("__session__")
135
+ # ipynb2pdf(ipynb)
136
+
137
+
138
+ #==============================================================================
139
+
140
+ #==============================================================================
141
+ #==============================================================================
142
+ #==============================================================================
143
+ #==============================================================================
144
+ #==============================================================================
145
+ #==============================================================================
146
+ #==============================================================================
147
+ #==============================================================================
@@ -0,0 +1,202 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 本模块功能:转换ipynb文件为pdf,带有可跳转的目录(目前一级标题定位还不准确,二级以下目录定位较准确,但已可用)
4
+ 所属工具包:证券投资分析工具SIAT
5
+ SIAT:Security Investment Analysis Tool
6
+ 创建日期:2025年7月8日
7
+ 最新修订日期:2025年7月8日
8
+ 作者:王德宏 (WANG Dehong, Peter)
9
+ 作者单位:北京外国语大学国际商学院
10
+ 作者邮件:wdehong2000@163.com
11
+ 版权所有:王德宏
12
+ 用途限制:仅限研究与教学使用。
13
+ 特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
14
+ """
15
+
16
+ #==============================================================================
17
+
18
+ # 首次运行前,请安装依赖:
19
+ # !pip install nbformat nbconvert playwright pymupdf nest_asyncio
20
+ # !playwright install
21
+
22
+ # 针对Python 3.13在Windows下的修复
23
+ # 在 Notebook 首格运行:
24
+ import sys, asyncio
25
+
26
+ if sys.platform.startswith("win"):
27
+ # SelectorEventLoop 无法启动 subprocess,改用 ProactorEventLoop
28
+ asyncio.set_event_loop_policy(
29
+ asyncio.WindowsProactorEventLoopPolicy()
30
+ )
31
+
32
+
33
+ # 下面在Python < 3.13可正常运行
34
+ import os
35
+ import re
36
+ import tempfile
37
+ import asyncio
38
+
39
+ import nest_asyncio
40
+ import nbformat
41
+ from nbconvert import HTMLExporter
42
+ from playwright.async_api import async_playwright
43
+ import fitz # PyMuPDF
44
+
45
+ nest_asyncio.apply() # 使 asyncio.run 在 Notebook 中可用
46
+
47
+ def ipynb2pdf(ipynb_path: str) -> str:
48
+ """
49
+ 将 .ipynb 转为带可跳转目录书签的 PDF。
50
+ 返回生成的 PDF 文件路径。
51
+ """
52
+ if not os.path.isfile(ipynb_path):
53
+ raise FileNotFoundError(f"找不到文件:{ipynb_path}")
54
+ output_pdf = ipynb_path[:-6] + ".pdf"
55
+
56
+ print(f"Converting to PDF ...")
57
+
58
+ # 1. 读 notebook → 提取目录结构
59
+ nb = nbformat.read(ipynb_path, as_version=4)
60
+ toc = _extract_toc(nb)
61
+
62
+ # 2. nb → HTML
63
+ exporter = HTMLExporter()
64
+ html_body, _ = exporter.from_notebook_node(nb)
65
+
66
+ # 3. 临时写 HTML / PDF
67
+ with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as th:
68
+ th.write(html_body)
69
+ html_path = th.name
70
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tp:
71
+ tmp_pdf = tp.name
72
+
73
+ # 4. Playwright 渲染 HTML → PDF
74
+ asyncio.run(_html_to_pdf(html_path, tmp_pdf))
75
+
76
+ # 5. PyMuPDF 添加书签
77
+ _add_bookmarks(tmp_pdf, output_pdf, toc)
78
+
79
+ # 6. 清理
80
+ os.unlink(html_path)
81
+ os.unlink(tmp_pdf)
82
+
83
+ from pathlib import Path
84
+ full_path = Path(output_pdf)
85
+ # 提取文件名
86
+ filename = full_path.name # 'report.pdf'
87
+ # 提取路径
88
+ directory = full_path.parent # PosixPath('/Users/peter/Documents')
89
+
90
+ print(f"✅ {filename} is created with TOC")
91
+ print(f"✅ It is in {directory}")
92
+
93
+ #return output_pdf
94
+ return
95
+
96
+ #==============================================================================
97
+ """
98
+ # 异步版本1
99
+ async def _html_to_pdf(html_path: str, pdf_path: str):
100
+ async with async_playwright() as p:
101
+ browser = await p.chromium.launch()
102
+ page = await browser.new_page()
103
+ await page.goto(f"file://{html_path}")
104
+ await page.pdf(
105
+ path=pdf_path,
106
+ #format="A4",
107
+ format="A3",
108
+ print_background=True,
109
+ margin={"top":"20mm","bottom":"20mm","left":"20mm","right":"20mm"},
110
+ )
111
+ await browser.close()
112
+ """
113
+ import nest_asyncio
114
+ import asyncio
115
+ from playwright.async_api import async_playwright
116
+
117
+ nest_asyncio.apply()
118
+
119
+ async def _html_to_pdf(html_path: str, pdf_path: str):
120
+ async with async_playwright() as p:
121
+ browser = await p.chromium.launch()
122
+ page = await browser.new_page()
123
+ await page.goto(f"file://{html_path}")
124
+ await page.pdf(
125
+ path=pdf_path,
126
+ format="A3",
127
+ print_background=True,
128
+ margin={"top": "20mm", "bottom": "20mm", "left": "20mm", "right": "20mm"},
129
+ )
130
+ await browser.close()
131
+
132
+
133
+
134
+ """
135
+ # 同步版本:不能在Jupyter中使用
136
+ from playwright.sync_api import sync_playwright
137
+
138
+ def _html_to_pdf(html_path: str, pdf_path: str):
139
+ with sync_playwright() as p:
140
+ browser = p.chromium.launch()
141
+ page = browser.new_page()
142
+ page.goto(f"file://{html_path}")
143
+ page.pdf(
144
+ path=pdf_path,
145
+ format="A3",
146
+ print_background=True,
147
+ margin={"top": "20mm", "bottom": "20mm", "left": "20mm", "right": "20mm"},
148
+ )
149
+ browser.close()
150
+ """
151
+ #==============================================================================
152
+
153
+ def _extract_toc(nb_node) -> list[tuple[int,str]]:
154
+ """
155
+ 从每个 markdown 单元首行提取 # 级别和标题文本,
156
+ 返回 [(level, title), …]
157
+ """
158
+ toc = []
159
+ for cell in nb_node.cells:
160
+ if cell.cell_type != "markdown":
161
+ continue
162
+ first = cell.source.strip().splitlines()[0]
163
+ m = re.match(r"^(#{1,6})\s+(.*)", first)
164
+ if m:
165
+ toc.append((len(m.group(1)), m.group(2).strip()))
166
+ return toc
167
+
168
+ def _add_bookmarks(input_pdf: str, output_pdf: str, toc: list[tuple[int,str]]):
169
+ """
170
+ 用 PyMuPDF 打开临时 PDF,按 toc 列表查找页码,
171
+ 然后用 set_toc() 批量写入书签。
172
+ """
173
+ doc = fitz.open(input_pdf)
174
+ outline = []
175
+ for level, title in toc:
176
+ page_num = 1
177
+ # 搜索标题出现在第几页(0-based → +1)
178
+ for i in range(doc.page_count):
179
+ if title in doc.load_page(i).get_text():
180
+ page_num = i + 1
181
+ break
182
+ outline.append([level, title, page_num])
183
+
184
+ # 批量设置目录书签
185
+ doc.set_toc(outline)
186
+ doc.save(output_pdf)
187
+
188
+ # 使用示例(另起一个 cell 运行):
189
+ # ipynb = globals().get("__session__")
190
+ # ipynb2pdf(ipynb)
191
+
192
+
193
+ #==============================================================================
194
+
195
+ #==============================================================================
196
+ #==============================================================================
197
+ #==============================================================================
198
+ #==============================================================================
199
+ #==============================================================================
200
+ #==============================================================================
201
+ #==============================================================================
202
+ #==============================================================================
@@ -0,0 +1,133 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 本模块功能:转换ipynb文件为pdf,带有可跳转的目录(目前一级标题定位还不准确,二级以下目录定位较准确,但已可用)
4
+ 所属工具包:证券投资分析工具SIAT
5
+ SIAT:Security Investment Analysis Tool
6
+ 创建日期:2025年7月8日
7
+ 最新修订日期:2025年7月8日
8
+ 作者:王德宏 (WANG Dehong, Peter)
9
+ 作者单位:北京外国语大学国际商学院
10
+ 作者邮件:wdehong2000@163.com
11
+ 版权所有:王德宏
12
+ 用途限制:仅限研究与教学使用。
13
+ 特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
14
+ """
15
+
16
+ #==============================================================================
17
+
18
+ # 首次运行前,请安装依赖:
19
+ # !pip install nbformat nbconvert weasyprint pymupdf nest_asyncio
20
+ # !playwright install
21
+
22
+ import os
23
+ import re
24
+ import tempfile
25
+ import nbformat
26
+ from nbconvert import HTMLExporter
27
+ from weasyprint import HTML, CSS
28
+ import fitz # PyMuPDF
29
+
30
+ def ipynb2pdf(ipynb_path: str) -> str:
31
+ """
32
+ 将 .ipynb 转为带可跳转目录书签的 PDF。
33
+ 返回生成的 PDF 文件路径。
34
+ """
35
+ if not os.path.isfile(ipynb_path):
36
+ raise FileNotFoundError(f"找不到文件:{ipynb_path}")
37
+ output_pdf = ipynb_path[:-6] + ".pdf"
38
+
39
+ print(f"📄 正在转换为 PDF ...")
40
+
41
+ # 1. 读取 notebook → 提取目录结构
42
+ nb = nbformat.read(ipynb_path, as_version=4)
43
+ toc = _extract_toc(nb)
44
+
45
+ # 2. notebook → HTML
46
+ exporter = HTMLExporter()
47
+ html_body, _ = exporter.from_notebook_node(nb)
48
+
49
+ # 3. 写入临时 HTML 文件
50
+ with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as th:
51
+ th.write(html_body)
52
+ html_path = th.name
53
+
54
+ # 4. 使用 WeasyPrint 渲染 HTML → PDF
55
+ tmp_pdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
56
+ _html_to_pdf(html_path, tmp_pdf)
57
+
58
+ # 5. 使用 PyMuPDF 添加书签
59
+ _add_bookmarks(tmp_pdf, output_pdf, toc)
60
+
61
+ # 6. 清理临时文件
62
+ os.unlink(html_path)
63
+ os.unlink(tmp_pdf)
64
+
65
+ print(f"✅ PDF 已生成:{output_pdf}")
66
+ return output_pdf
67
+
68
+ def _html_to_pdf(html_path: str, pdf_path: str):
69
+ """
70
+ 使用 WeasyPrint 将 HTML 渲染为 PDF。
71
+ """
72
+ HTML(filename=html_path).write_pdf(
73
+ pdf_path,
74
+ stylesheets=[CSS(string="""
75
+ @page {
76
+ size: A4;
77
+ margin: 20mm;
78
+ }
79
+ body {
80
+ font-family: 'Arial', sans-serif;
81
+ line-height: 1.6;
82
+ }
83
+ """)]
84
+ )
85
+
86
+ def _extract_toc(nb_node) -> list[tuple[int, str]]:
87
+ """
88
+ 从每个 markdown 单元首行提取 # 级别和标题文本,
89
+ 返回 [(level, title), …]
90
+ """
91
+ toc = []
92
+ for cell in nb_node.cells:
93
+ if cell.cell_type != "markdown":
94
+ continue
95
+ first = cell.source.strip().splitlines()[0]
96
+ m = re.match(r"^(#{1,6})\s+(.*)", first)
97
+ if m:
98
+ toc.append((len(m.group(1)), m.group(2).strip()))
99
+ return toc
100
+
101
+ def _add_bookmarks(input_pdf: str, output_pdf: str, toc: list[tuple[int, str]]):
102
+ """
103
+ 用 PyMuPDF 打开临时 PDF,按 toc 列表查找页码,
104
+ 然后用 set_toc() 批量写入书签。
105
+ """
106
+ doc = fitz.open(input_pdf)
107
+ outline = []
108
+ for level, title in toc:
109
+ page_num = 1
110
+ for i in range(doc.page_count):
111
+ if title in doc.load_page(i).get_text():
112
+ page_num = i + 1
113
+ break
114
+ outline.append([level, title, page_num])
115
+ doc.set_toc(outline)
116
+ doc.save(output_pdf)
117
+
118
+
119
+ # 使用示例(另起一个 cell 运行):
120
+ # ipynb = globals().get("__session__")
121
+ # ipynb2pdf(ipynb)
122
+
123
+
124
+ #==============================================================================
125
+
126
+ #==============================================================================
127
+ #==============================================================================
128
+ #==============================================================================
129
+ #==============================================================================
130
+ #==============================================================================
131
+ #==============================================================================
132
+ #==============================================================================
133
+ #==============================================================================
siat/save2pdf.py CHANGED
@@ -19,37 +19,81 @@ SIAT:Security Investment Analysis Tool
19
19
  # !pip install nbformat nbconvert playwright pymupdf nest_asyncio
20
20
  # !playwright install
21
21
 
22
+ #关闭所有警告
23
+ import warnings; warnings.filterwarnings('ignore')
24
+
25
+ # 能够在Python 3.13下运行了!
22
26
  import os
23
27
  import re
28
+ import sys
24
29
  import tempfile
25
- import asyncio
26
-
27
- import nest_asyncio
30
+ import subprocess
28
31
  import nbformat
29
32
  from nbconvert import HTMLExporter
30
- from playwright.async_api import async_playwright
31
- import fitz # PyMuPDF
33
+ import fitz # PyMuPDF
34
+ from pathlib import Path
35
+
36
+ import contextlib
37
+ import io
32
38
 
33
- nest_asyncio.apply() # 使 asyncio.run 在 Notebook 中可用
39
+ import time
40
+ from IPython.display import display, Javascript
34
41
 
42
+ # —— 新增:Notebook 强制保存 ——
43
+ def _save_current_notebook():
44
+ """
45
+ 在浏览器端触发一次保存:兼容 Classic Notebook、Lab 3.x/4.x。
46
+ """
47
+ js = """
48
+ (function() {
49
+ // Classic Notebook
50
+ if (window.Jupyter && Jupyter.notebook) {
51
+ Jupyter.notebook.save_checkpoint();
52
+ }
53
+ // JupyterLab >=3: 用 app.commands
54
+ else if (window.jupyterapp && jupyterapp.commands) {
55
+ jupyterapp.commands.execute('docmanager:save');
56
+ }
57
+ // JupyterLab <=2 或其他
58
+ else if (window.require) {
59
+ require(['@jupyterlab/docmanager'], function(docManager) {
60
+ docManager.save();
61
+ });
62
+ }
63
+ })();
64
+ """
65
+ try:
66
+ display(Javascript(js))
67
+ time.sleep(0.5) # 给浏览器一点时间写盘
68
+ except Exception:
69
+ pass
70
+
71
+
35
72
  def ipynb2pdf(ipynb_path: str) -> str:
36
73
  """
37
74
  将 .ipynb 转为带可跳转目录书签的 PDF。
38
75
  返回生成的 PDF 文件路径。
39
76
  """
77
+ # 0. 强制保存当前 Notebook
78
+ #print("Saving current ipynb ...")
79
+ _save_current_notebook()
80
+
40
81
  if not os.path.isfile(ipynb_path):
41
82
  raise FileNotFoundError(f"找不到文件:{ipynb_path}")
42
83
  output_pdf = ipynb_path[:-6] + ".pdf"
43
84
 
44
- print(f"Converting to PDF ...")
85
+ print("Converting to PDF ...")
45
86
 
46
- # 1. 读 notebook → 提取目录结构
87
+ # 1. 读 notebook → 提取目录
47
88
  nb = nbformat.read(ipynb_path, as_version=4)
48
89
  toc = _extract_toc(nb)
49
90
 
50
- # 2. nb → HTML
91
+ # 2. nb → HTML(同时关闭图 alt 检查错误信息)
51
92
  exporter = HTMLExporter()
52
- html_body, _ = exporter.from_notebook_node(nb)
93
+ buf = io.StringIO()
94
+ # 屏蔽 stderr
95
+ with contextlib.redirect_stderr(buf):
96
+ html_body, _ = exporter.from_notebook_node(nb)
53
97
 
54
98
  # 3. 临时写 HTML / PDF
55
99
  with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as th:
@@ -58,44 +102,43 @@ def ipynb2pdf(ipynb_path: str) -> str:
58
102
  with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tp:
59
103
  tmp_pdf = tp.name
60
104
 
61
- # 4. Playwright 渲染 HTML → PDF
62
- asyncio.run(_html_to_pdf(html_path, tmp_pdf))
105
+ # 4. Playwright 渲染 HTML → PDF(在子进程中调用 sync API,避开 Jupyter 的 asyncio loop)
106
+ script = f"""
107
+ import sys
108
+ from playwright.sync_api import sync_playwright
109
+
110
+ p = sync_playwright().start()
111
+ browser = p.chromium.launch()
112
+ page = browser.new_page()
113
+ page.goto(r"file://{html_path}")
114
+ page.pdf(
115
+ path=r"{tmp_pdf}",
116
+ format="A3",
117
+ print_background=True,
118
+ margin={{"top":"20mm","bottom":"20mm","left":"20mm","right":"20mm"}}
119
+ )
120
+ browser.close()
121
+ p.stop()
122
+ """
123
+ subprocess.run([sys.executable, "-c", script], check=True)
63
124
 
64
125
  # 5. PyMuPDF 添加书签
65
126
  _add_bookmarks(tmp_pdf, output_pdf, toc)
66
127
 
67
- # 6. 清理
128
+ # 6. 清理临时文件
68
129
  os.unlink(html_path)
69
130
  os.unlink(tmp_pdf)
70
131
 
71
- from pathlib import Path
132
+ # 打印结果
72
133
  full_path = Path(output_pdf)
73
- # 提取文件名
74
- filename = full_path.name # 'report.pdf'
75
- # 提取路径
76
- directory = full_path.parent # PosixPath('/Users/peter/Documents')
77
-
78
- print(f"✅ {filename} is created with TOC")
79
- print(f"✅ It is in {directory}")
80
-
134
+ print(f"✅ {full_path.name} is created with TOC")
135
+ print(f"✅ It is in {full_path.parent}")
136
+
81
137
  #return output_pdf
82
138
  return
83
139
 
84
- async def _html_to_pdf(html_path: str, pdf_path: str):
85
- async with async_playwright() as p:
86
- browser = await p.chromium.launch()
87
- page = await browser.new_page()
88
- await page.goto(f"file://{html_path}")
89
- await page.pdf(
90
- path=pdf_path,
91
- #format="A4",
92
- format="A3",
93
- print_background=True,
94
- margin={"top":"20mm","bottom":"20mm","left":"20mm","right":"20mm"},
95
- )
96
- await browser.close()
97
140
 
98
- def _extract_toc(nb_node) -> list[tuple[int,str]]:
141
+ def _extract_toc(nb_node) -> list[tuple[int, str]]:
99
142
  """
100
143
  从每个 markdown 单元首行提取 # 级别和标题文本,
101
144
  返回 [(level, title), …]
@@ -110,7 +153,8 @@ def _extract_toc(nb_node) -> list[tuple[int,str]]:
110
153
  toc.append((len(m.group(1)), m.group(2).strip()))
111
154
  return toc
112
155
 
113
- def _add_bookmarks(input_pdf: str, output_pdf: str, toc: list[tuple[int,str]]):
156
+
157
+ def _add_bookmarks(input_pdf: str, output_pdf: str, toc: list[tuple[int, str]]):
114
158
  """
115
159
  用 PyMuPDF 打开临时 PDF,按 toc 列表查找页码,
116
160
  然后用 set_toc() 批量写入书签。
@@ -118,18 +162,18 @@ def _add_bookmarks(input_pdf: str, output_pdf: str, toc: list[tuple[int,str]]):
118
162
  doc = fitz.open(input_pdf)
119
163
  outline = []
120
164
  for level, title in toc:
121
- page_num = 1
122
165
  # 搜索标题出现在第几页(0-based → +1)
123
- for i in range(doc.page_count):
124
- if title in doc.load_page(i).get_text():
125
- page_num = i + 1
126
- break
166
+ page_num = next(
167
+ (i+1 for i in range(doc.page_count)
168
+ if title in doc.load_page(i).get_text()),
169
+ 1
170
+ )
127
171
  outline.append([level, title, page_num])
128
172
 
129
- # 批量设置目录书签
130
173
  doc.set_toc(outline)
131
174
  doc.save(output_pdf)
132
175
 
176
+
133
177
  # 使用示例(另起一个 cell 运行):
134
178
  # ipynb = globals().get("__session__")
135
179
  # ipynb2pdf(ipynb)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: siat
3
- Version: 3.11.3
3
+ Version: 3.11.5
4
4
  Summary: Securities Investment Analysis Tools (siat)
5
5
  Home-page: https://pypi.org/project/siat/
6
6
  Author: Prof. WANG Dehong, International Business School, Beijing Foreign Studies University
@@ -45,10 +45,12 @@ Requires-Dist: ipywidgets
45
45
  Requires-Dist: yahooquery
46
46
  Requires-Dist: alpha-vantage
47
47
  Requires-Dist: tiingo[pandas]
48
- Requires-Dist: numpy <2
48
+ Requires-Dist: numpy<2
49
49
  Requires-Dist: playwright
50
50
  Requires-Dist: pymupdf
51
51
  Requires-Dist: pypandoc
52
+ Requires-Dist: python-docx
53
+ Requires-Dist: weasyprint
52
54
 
53
55
 
54
56
  # What is siat?
@@ -10,7 +10,7 @@ siat/bond_china.py,sha256=WzUhjYYk8tsr3BDWLQcpuj9DqNxTzBSIi_wuAOZ48kY,3082
10
10
  siat/bond_zh_sina.py,sha256=26BohGcS120utwqg9dJvdGm5OkuNpNu5bco80uOuQpU,4423
11
11
  siat/capm_beta.py,sha256=t8-xr90II0JzbjsTOZNpRze_mKTvBRXjwN2o0N0tgD8,30521
12
12
  siat/capm_beta2.py,sha256=S2x6PrWp_1FyzVmG2MVzCf7LlpfHHEJxroJH2b26DvQ,35989
13
- siat/common.py,sha256=GLNRbXP7uDA_pibWXJQ-St0o9ylhvRut0k9KpCQ70bI,193909
13
+ siat/common.py,sha256=L8ZAz0gkJjatm3CQ5mwJTPpATQtbagUPwI-7i3O5p18,198444
14
14
  siat/compare_cross.py,sha256=3iP9TH2h3w27F2ARZc7FjKcErYCzWRc-TPiymOyoVtw,24171
15
15
  siat/copyrights.py,sha256=YMLjZb328YpFMR-s_GUu0HBgeGce3pV7DgRut8S3I7w,690
16
16
  siat/cryptocurrency.py,sha256=QSc4jK9VFlqBWVu-0th1BIMt8wC-5R5sWky3EaNupy0,27940
@@ -50,8 +50,11 @@ siat/risk_adjusted_return.py,sha256=Q4ZRdTF57eNt4QCjeQ7uA8nG56Jls8f_QfJasZQEo0M,
50
50
  siat/risk_adjusted_return2.py,sha256=gCtHhfGNlV1wHqU9gfHJ_n17wRSyTMxc7lS8jgZ-GQk,87409
51
51
  siat/risk_evaluation.py,sha256=xfgLSKlIWYmRJrIL4kn2k2hp9fyOMAzYGIhi9ImvKOw,88917
52
52
  siat/risk_free_rate.py,sha256=IBuRqA2kppdZsW4D4fapW7vnM5HMEXOn95A5r9Pkwlo,12384
53
- siat/save2docx.py,sha256=c43X3IGgfli4gg2VJilRaLzd_KCZX4yF_lLO2S9FvQA,12302
54
- siat/save2pdf.py,sha256=cB1L5lH2n6RfgubCLFR7a617OGnrtT9IQhWXWGncoFs,5114
53
+ siat/save2docx.py,sha256=cqdbPUEAPQD5PcByawMecXhz9ih_lBsWFvfCzGPFDuE,13494
54
+ siat/save2pdf-playwright-20250712.py,sha256=cB1L5lH2n6RfgubCLFR7a617OGnrtT9IQhWXWGncoFs,5114
55
+ siat/save2pdf-playwright-20250714.py,sha256=WPdjT4kjXiAoWri-nyvNlPhvjmQlAHX80qmIT5GGYxs,6785
56
+ siat/save2pdf-weasyprint-20250712.py,sha256=ZVZq5yT-grcmdY3qq8XXZ7OCDCGqvh66o2WfszoK9ws,4570
57
+ siat/save2pdf.py,sha256=YGxFCO6oOBpcyNnJPEc_skdohevUC5Hw5EedeL0HMvI,6136
55
58
  siat/sector_china.py,sha256=uLsDXdRBDVfgG6tnXWnQOTyDmyZfglVO9DRUYU2e3pk,157914
56
59
  siat/security_price2.py,sha256=DDiZ2dlv_TYPLhA8-gGb9i9xrl88r4rgSMEcxqQ6aU0,28065
57
60
  siat/security_prices.py,sha256=X3ip0q_m3OL3QRNRkr_lYQk-wsXLf6dWkFkyoZijhck,129368
@@ -73,8 +76,8 @@ siat/valuation.py,sha256=xGizcKJZ3ADLWWHm2TFQub18FxiDv2doQwBwbEqyqz0,51980
73
76
  siat/valuation_china.py,sha256=eSKIDckyjG8QkENlW_OKkqbQHno8pzDcomBO9iGNJVM,83079
74
77
  siat/var_model_validation.py,sha256=loqziBYO2p0xkeWm3Rb1rJsDhbcgAZ5aR9rBLRwLU5E,17624
75
78
  siat/yf_name.py,sha256=laNKMTZ9hdenGX3IZ7G0a2RLBKEWtUQJFY9CWuk_fp8,24058
76
- siat-3.11.3.dist-info/LICENSE,sha256=NTEMMROY9_4U1szoKC3N2BLHcDd_o5uTgqdVH8tbApw,1071
77
- siat-3.11.3.dist-info/METADATA,sha256=5NpbsFU3Bzzehbm1u5q8EFFX_61QA_ydPkJpXMGpHK8,8533
78
- siat-3.11.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
79
- siat-3.11.3.dist-info/top_level.txt,sha256=X5R8wrVviq8agwJFVRVDsufkuOJuit-1qAT_kXeptrY,17
80
- siat-3.11.3.dist-info/RECORD,,
79
+ siat-3.11.5.dist-info/LICENSE,sha256=NTEMMROY9_4U1szoKC3N2BLHcDd_o5uTgqdVH8tbApw,1071
80
+ siat-3.11.5.dist-info/METADATA,sha256=4sdqgT0GBYVj46u0O_SgiUepBptHRxxTGE9EoFaawg8,8587
81
+ siat-3.11.5.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
82
+ siat-3.11.5.dist-info/top_level.txt,sha256=X5R8wrVviq8agwJFVRVDsufkuOJuit-1qAT_kXeptrY,17
83
+ siat-3.11.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
File without changes