pyxllib 0.3.197__py3-none-any.whl → 0.3.200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxllib/__init__.py +21 -21
- pyxllib/algo/__init__.py +8 -8
- pyxllib/algo/disjoint.py +54 -54
- pyxllib/algo/geo.py +541 -541
- pyxllib/algo/intervals.py +964 -964
- pyxllib/algo/matcher.py +389 -389
- pyxllib/algo/newbie.py +166 -166
- pyxllib/algo/pupil.py +629 -629
- pyxllib/algo/shapelylib.py +67 -67
- pyxllib/algo/specialist.py +241 -241
- pyxllib/algo/stat.py +494 -494
- pyxllib/algo/treelib.py +149 -149
- pyxllib/algo/unitlib.py +66 -66
- pyxllib/autogui/__init__.py +5 -5
- pyxllib/autogui/activewin.py +246 -246
- pyxllib/autogui/all.py +9 -9
- pyxllib/autogui/autogui.py +852 -852
- pyxllib/autogui/uiautolib.py +362 -362
- pyxllib/autogui/virtualkey.py +102 -102
- pyxllib/autogui/wechat.py +827 -827
- pyxllib/autogui/wechat_msg.py +421 -421
- pyxllib/autogui/wxautolib.py +84 -84
- pyxllib/cv/__init__.py +5 -5
- pyxllib/cv/expert.py +267 -267
- pyxllib/cv/imfile.py +159 -159
- pyxllib/cv/imhash.py +39 -39
- pyxllib/cv/pupil.py +9 -9
- pyxllib/cv/rgbfmt.py +1525 -1525
- pyxllib/cv/slidercaptcha.py +137 -137
- pyxllib/cv/trackbartools.py +251 -251
- pyxllib/cv/xlcvlib.py +1040 -1040
- pyxllib/cv/xlpillib.py +423 -423
- pyxllib/data/echarts.py +240 -240
- pyxllib/data/jsonlib.py +89 -89
- pyxllib/data/oss.py +72 -72
- pyxllib/data/pglib.py +1127 -1127
- pyxllib/data/sqlite.py +568 -568
- pyxllib/data/sqllib.py +297 -297
- pyxllib/ext/JLineViewer.py +505 -505
- pyxllib/ext/__init__.py +6 -6
- pyxllib/ext/demolib.py +246 -246
- pyxllib/ext/drissionlib.py +277 -277
- pyxllib/ext/kq5034lib.py +12 -12
- pyxllib/ext/old.py +663 -663
- pyxllib/ext/qt.py +449 -449
- pyxllib/ext/robustprocfile.py +497 -497
- pyxllib/ext/seleniumlib.py +76 -76
- pyxllib/ext/tk.py +173 -173
- pyxllib/ext/unixlib.py +827 -827
- pyxllib/ext/utools.py +351 -351
- pyxllib/ext/webhook.py +124 -119
- pyxllib/ext/win32lib.py +40 -40
- pyxllib/ext/wjxlib.py +88 -88
- pyxllib/ext/wpsapi.py +124 -124
- pyxllib/ext/xlwork.py +9 -9
- pyxllib/ext/yuquelib.py +1105 -1105
- pyxllib/file/__init__.py +17 -17
- pyxllib/file/docxlib.py +761 -761
- pyxllib/file/gitlib.py +309 -309
- pyxllib/file/libreoffice.py +165 -165
- pyxllib/file/movielib.py +148 -148
- pyxllib/file/newbie.py +10 -10
- pyxllib/file/onenotelib.py +1469 -1469
- pyxllib/file/packlib/__init__.py +330 -330
- pyxllib/file/packlib/zipfile.py +2441 -2441
- pyxllib/file/pdflib.py +426 -426
- pyxllib/file/pupil.py +185 -185
- pyxllib/file/specialist/__init__.py +685 -685
- pyxllib/file/specialist/dirlib.py +799 -799
- pyxllib/file/specialist/download.py +193 -193
- pyxllib/file/specialist/filelib.py +2829 -2829
- pyxllib/file/xlsxlib.py +3131 -3131
- pyxllib/file/xlsyncfile.py +341 -341
- pyxllib/prog/__init__.py +5 -5
- pyxllib/prog/cachetools.py +64 -64
- pyxllib/prog/deprecatedlib.py +233 -233
- pyxllib/prog/filelock.py +42 -42
- pyxllib/prog/ipyexec.py +253 -253
- pyxllib/prog/multiprogs.py +940 -940
- pyxllib/prog/newbie.py +451 -451
- pyxllib/prog/pupil.py +1197 -1197
- pyxllib/prog/sitepackages.py +33 -33
- pyxllib/prog/specialist/__init__.py +391 -391
- pyxllib/prog/specialist/bc.py +203 -203
- pyxllib/prog/specialist/browser.py +497 -497
- pyxllib/prog/specialist/common.py +347 -347
- pyxllib/prog/specialist/datetime.py +198 -198
- pyxllib/prog/specialist/tictoc.py +240 -240
- pyxllib/prog/specialist/xllog.py +180 -180
- pyxllib/prog/xlosenv.py +108 -108
- pyxllib/stdlib/__init__.py +17 -17
- pyxllib/stdlib/tablepyxl/__init__.py +10 -10
- pyxllib/stdlib/tablepyxl/style.py +303 -303
- pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
- pyxllib/text/__init__.py +8 -8
- pyxllib/text/ahocorasick.py +39 -39
- pyxllib/text/airscript.js +744 -744
- pyxllib/text/charclasslib.py +121 -121
- pyxllib/text/jiebalib.py +267 -267
- pyxllib/text/jinjalib.py +32 -32
- pyxllib/text/jsa_ai_prompt.md +271 -271
- pyxllib/text/jscode.py +922 -922
- pyxllib/text/latex/__init__.py +158 -158
- pyxllib/text/levenshtein.py +303 -303
- pyxllib/text/nestenv.py +1215 -1215
- pyxllib/text/newbie.py +300 -300
- pyxllib/text/pupil/__init__.py +8 -8
- pyxllib/text/pupil/common.py +1121 -1121
- pyxllib/text/pupil/xlalign.py +326 -326
- pyxllib/text/pycode.py +47 -47
- pyxllib/text/specialist/__init__.py +8 -8
- pyxllib/text/specialist/common.py +112 -112
- pyxllib/text/specialist/ptag.py +186 -186
- pyxllib/text/spellchecker.py +172 -172
- pyxllib/text/templates/echart_base.html +10 -10
- pyxllib/text/templates/highlight_code.html +16 -16
- pyxllib/text/templates/latex_editor.html +102 -102
- pyxllib/text/vbacode.py +17 -17
- pyxllib/text/xmllib.py +747 -747
- pyxllib/xl.py +42 -39
- pyxllib/xlcv.py +17 -17
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/METADATA +1 -1
- pyxllib-0.3.200.dist-info/RECORD +126 -0
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/licenses/LICENSE +190 -190
- pyxllib-0.3.197.dist-info/RECORD +0 -126
- {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +0 -0
pyxllib/file/libreoffice.py
CHANGED
@@ -1,165 +1,165 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2024/04/27
|
6
|
-
|
7
|
-
"""
|
8
|
-
libreoffice这个软件相关的功能
|
9
|
-
|
10
|
-
官网:https://www.libreoffice.org/download/download-libreoffice/
|
11
|
-
|
12
|
-
linux的安装:
|
13
|
-
sudo apt-get update
|
14
|
-
sudo apt-get install libreoffice -y
|
15
|
-
"""
|
16
|
-
|
17
|
-
import os
|
18
|
-
from pathlib import Path
|
19
|
-
import subprocess
|
20
|
-
import sys
|
21
|
-
import tempfile
|
22
|
-
from datetime import datetime
|
23
|
-
|
24
|
-
|
25
|
-
def check_libreoffice():
|
26
|
-
""" 检查LibreOffice是否安装 """
|
27
|
-
executor = get_libreoffice_executor()
|
28
|
-
try:
|
29
|
-
subprocess.run([executor, '--version'], check=True)
|
30
|
-
except FileNotFoundError:
|
31
|
-
return False
|
32
|
-
except subprocess.CalledProcessError:
|
33
|
-
return False
|
34
|
-
return True
|
35
|
-
|
36
|
-
|
37
|
-
def get_libreoffice_executor():
|
38
|
-
""" 获得可执行文件的名称 """
|
39
|
-
# 根据所在系统区分
|
40
|
-
if sys.platform == 'win32':
|
41
|
-
return 'soffice.exe'
|
42
|
-
else:
|
43
|
-
return 'libreoffice'
|
44
|
-
|
45
|
-
|
46
|
-
def infer_file_format(file_path):
|
47
|
-
""" 推断文件所属办公文档的'主类' """
|
48
|
-
ext = Path(file_path).suffix.lower()
|
49
|
-
if ext in ['.doc', '.docx']:
|
50
|
-
fmt = 'docx'
|
51
|
-
elif ext in ['.ppt', '.pptx']:
|
52
|
-
fmt = 'pptx'
|
53
|
-
elif ext in ['.xls', '.xlsx']:
|
54
|
-
# todo 其实excel还有xlsm的可能,但是用libreoffice可能没有微软365那样可以判断出宏的情况保存为xlsm
|
55
|
-
fmt = 'xlsx'
|
56
|
-
else:
|
57
|
-
raise ValueError("不支持的文件格式")
|
58
|
-
return fmt
|
59
|
-
|
60
|
-
|
61
|
-
class UpgradeOfficeFile:
|
62
|
-
@classmethod
|
63
|
-
def to_dir(cls, file_path, out_dir=None, fmt=None, timeout=10):
|
64
|
-
""" 将doc文件转换为docx文件
|
65
|
-
|
66
|
-
:param file_path: 待升级的文件路径
|
67
|
-
:param out_dir: 输出文件目录
|
68
|
-
官方接口默认只能设置导出目录,不能设置导出文件名,文件名是跟原始文件一样的
|
69
|
-
:param fmt: 输出文件格式
|
70
|
-
docx, xlsx, pptx
|
71
|
-
"""
|
72
|
-
if isinstance(file_path, Path):
|
73
|
-
file_path = file_path.as_posix()
|
74
|
-
|
75
|
-
# 获取LibreOffice可执行文件的名称
|
76
|
-
executor = get_libreoffice_executor()
|
77
|
-
|
78
|
-
# 如果未指定输出路径,则默认在输入文件的同目录下生成同名的DOCX文件
|
79
|
-
if out_dir is None:
|
80
|
-
out_dir = os.path.dirname(file_path)
|
81
|
-
|
82
|
-
if fmt is None:
|
83
|
-
fmt = infer_file_format(file_path)
|
84
|
-
|
85
|
-
# 构建转换命令
|
86
|
-
command = [
|
87
|
-
executor,
|
88
|
-
'--headless', # 无界面模式
|
89
|
-
'--convert-to', fmt, # 转换为docx格式
|
90
|
-
'--outdir', str(out_dir), # 输出目录
|
91
|
-
file_path # 输入文件路径
|
92
|
-
]
|
93
|
-
|
94
|
-
subprocess.run(command, timeout=timeout, check=True)
|
95
|
-
|
96
|
-
# 返回转换后的文件路径
|
97
|
-
base_name = os.path.basename(file_path)
|
98
|
-
name, _ = os.path.splitext(base_name)
|
99
|
-
new_file_path = os.path.join(out_dir, f"{name}.{fmt}")
|
100
|
-
|
101
|
-
# todo 以目标文件是否存在判断转换是否成功也是有一定bug的,可能目标文件本来就存在
|
102
|
-
# 但如果严谨判断,就要分析subprocess.run的输出结果了,那个太麻烦,先用简便方法处理
|
103
|
-
if not Path(new_file_path).exists():
|
104
|
-
raise ValueError(f"升级文档失败")
|
105
|
-
|
106
|
-
return new_file_path
|
107
|
-
|
108
|
-
@classmethod
|
109
|
-
def to_file(cls, in_file, out_file=None, fmt=None, timeout=10):
|
110
|
-
""" 可以指定转换出的文件名的版本
|
111
|
-
|
112
|
-
:param in_file: 待转换的文件路径
|
113
|
-
:param out_file: 输出文件路径
|
114
|
-
若未指定,默认与原文件同名,在原文件所在目录生成
|
115
|
-
:param fmt: 输出文件格式
|
116
|
-
docx, xlsx, pptx
|
117
|
-
若未指定,则根据in_file的后缀名自动判断
|
118
|
-
"""
|
119
|
-
# 将in_file转换为Path对象
|
120
|
-
in_file = Path(in_file)
|
121
|
-
|
122
|
-
# 如果fmt为None,则根据in_file推断
|
123
|
-
if fmt is None:
|
124
|
-
fmt = infer_file_format(in_file)
|
125
|
-
|
126
|
-
# 如果out_file为None,则默认在原文件目录生成同名的新格式文件
|
127
|
-
if out_file is None:
|
128
|
-
out_file = in_file.with_suffix(f'.{fmt}')
|
129
|
-
else:
|
130
|
-
out_file = Path(out_file)
|
131
|
-
|
132
|
-
# 确保out_file的父目录存在,不存在则创建
|
133
|
-
out_file.parent.mkdir(parents=True, exist_ok=True)
|
134
|
-
|
135
|
-
# 调用upgrade_office_file函数进行转换
|
136
|
-
temp_file = cls.to_dir(in_file, out_dir=out_file.parent, fmt=fmt, timeout=timeout)
|
137
|
-
|
138
|
-
# 将生成的临时文件重命名为out_file
|
139
|
-
os.rename(temp_file, out_file)
|
140
|
-
|
141
|
-
return out_file
|
142
|
-
|
143
|
-
@classmethod
|
144
|
-
def to_tempfile(cls, in_file, fmt=None, *, timestamp_stem=False, create_subdir=False, timeout=10):
|
145
|
-
""" 将文件转换为临时文件
|
146
|
-
|
147
|
-
:param timestamp_stem: 时间戳文件名
|
148
|
-
:param create_subdir: 是否在临时目录中创建新的子目录
|
149
|
-
"""
|
150
|
-
if fmt is None:
|
151
|
-
fmt = infer_file_format(in_file)
|
152
|
-
|
153
|
-
root = Path(tempfile.gettempdir())
|
154
|
-
if create_subdir:
|
155
|
-
root2 = root / datetime.now().strftime('%Y%m%d.%H%M%S.%f')
|
156
|
-
root2.mkdir(parents=True, exist_ok=True)
|
157
|
-
root = root2
|
158
|
-
|
159
|
-
if timestamp_stem:
|
160
|
-
stem = datetime.now().strftime('%Y%m%d.%H%M%S.%f')
|
161
|
-
out_file = cls.to_file(in_file, out_file=root / f"{stem}.{fmt}", fmt=fmt, timeout=timeout)
|
162
|
-
else:
|
163
|
-
out_file = cls.to_dir(in_file, out_dir=root, fmt=fmt, timeout=timeout)
|
164
|
-
|
165
|
-
return out_file
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2024/04/27
|
6
|
+
|
7
|
+
"""
|
8
|
+
libreoffice这个软件相关的功能
|
9
|
+
|
10
|
+
官网:https://www.libreoffice.org/download/download-libreoffice/
|
11
|
+
|
12
|
+
linux的安装:
|
13
|
+
sudo apt-get update
|
14
|
+
sudo apt-get install libreoffice -y
|
15
|
+
"""
|
16
|
+
|
17
|
+
import os
|
18
|
+
from pathlib import Path
|
19
|
+
import subprocess
|
20
|
+
import sys
|
21
|
+
import tempfile
|
22
|
+
from datetime import datetime
|
23
|
+
|
24
|
+
|
25
|
+
def check_libreoffice():
|
26
|
+
""" 检查LibreOffice是否安装 """
|
27
|
+
executor = get_libreoffice_executor()
|
28
|
+
try:
|
29
|
+
subprocess.run([executor, '--version'], check=True)
|
30
|
+
except FileNotFoundError:
|
31
|
+
return False
|
32
|
+
except subprocess.CalledProcessError:
|
33
|
+
return False
|
34
|
+
return True
|
35
|
+
|
36
|
+
|
37
|
+
def get_libreoffice_executor():
|
38
|
+
""" 获得可执行文件的名称 """
|
39
|
+
# 根据所在系统区分
|
40
|
+
if sys.platform == 'win32':
|
41
|
+
return 'soffice.exe'
|
42
|
+
else:
|
43
|
+
return 'libreoffice'
|
44
|
+
|
45
|
+
|
46
|
+
def infer_file_format(file_path):
|
47
|
+
""" 推断文件所属办公文档的'主类' """
|
48
|
+
ext = Path(file_path).suffix.lower()
|
49
|
+
if ext in ['.doc', '.docx']:
|
50
|
+
fmt = 'docx'
|
51
|
+
elif ext in ['.ppt', '.pptx']:
|
52
|
+
fmt = 'pptx'
|
53
|
+
elif ext in ['.xls', '.xlsx']:
|
54
|
+
# todo 其实excel还有xlsm的可能,但是用libreoffice可能没有微软365那样可以判断出宏的情况保存为xlsm
|
55
|
+
fmt = 'xlsx'
|
56
|
+
else:
|
57
|
+
raise ValueError("不支持的文件格式")
|
58
|
+
return fmt
|
59
|
+
|
60
|
+
|
61
|
+
class UpgradeOfficeFile:
|
62
|
+
@classmethod
|
63
|
+
def to_dir(cls, file_path, out_dir=None, fmt=None, timeout=10):
|
64
|
+
""" 将doc文件转换为docx文件
|
65
|
+
|
66
|
+
:param file_path: 待升级的文件路径
|
67
|
+
:param out_dir: 输出文件目录
|
68
|
+
官方接口默认只能设置导出目录,不能设置导出文件名,文件名是跟原始文件一样的
|
69
|
+
:param fmt: 输出文件格式
|
70
|
+
docx, xlsx, pptx
|
71
|
+
"""
|
72
|
+
if isinstance(file_path, Path):
|
73
|
+
file_path = file_path.as_posix()
|
74
|
+
|
75
|
+
# 获取LibreOffice可执行文件的名称
|
76
|
+
executor = get_libreoffice_executor()
|
77
|
+
|
78
|
+
# 如果未指定输出路径,则默认在输入文件的同目录下生成同名的DOCX文件
|
79
|
+
if out_dir is None:
|
80
|
+
out_dir = os.path.dirname(file_path)
|
81
|
+
|
82
|
+
if fmt is None:
|
83
|
+
fmt = infer_file_format(file_path)
|
84
|
+
|
85
|
+
# 构建转换命令
|
86
|
+
command = [
|
87
|
+
executor,
|
88
|
+
'--headless', # 无界面模式
|
89
|
+
'--convert-to', fmt, # 转换为docx格式
|
90
|
+
'--outdir', str(out_dir), # 输出目录
|
91
|
+
file_path # 输入文件路径
|
92
|
+
]
|
93
|
+
|
94
|
+
subprocess.run(command, timeout=timeout, check=True)
|
95
|
+
|
96
|
+
# 返回转换后的文件路径
|
97
|
+
base_name = os.path.basename(file_path)
|
98
|
+
name, _ = os.path.splitext(base_name)
|
99
|
+
new_file_path = os.path.join(out_dir, f"{name}.{fmt}")
|
100
|
+
|
101
|
+
# todo 以目标文件是否存在判断转换是否成功也是有一定bug的,可能目标文件本来就存在
|
102
|
+
# 但如果严谨判断,就要分析subprocess.run的输出结果了,那个太麻烦,先用简便方法处理
|
103
|
+
if not Path(new_file_path).exists():
|
104
|
+
raise ValueError(f"升级文档失败")
|
105
|
+
|
106
|
+
return new_file_path
|
107
|
+
|
108
|
+
@classmethod
|
109
|
+
def to_file(cls, in_file, out_file=None, fmt=None, timeout=10):
|
110
|
+
""" 可以指定转换出的文件名的版本
|
111
|
+
|
112
|
+
:param in_file: 待转换的文件路径
|
113
|
+
:param out_file: 输出文件路径
|
114
|
+
若未指定,默认与原文件同名,在原文件所在目录生成
|
115
|
+
:param fmt: 输出文件格式
|
116
|
+
docx, xlsx, pptx
|
117
|
+
若未指定,则根据in_file的后缀名自动判断
|
118
|
+
"""
|
119
|
+
# 将in_file转换为Path对象
|
120
|
+
in_file = Path(in_file)
|
121
|
+
|
122
|
+
# 如果fmt为None,则根据in_file推断
|
123
|
+
if fmt is None:
|
124
|
+
fmt = infer_file_format(in_file)
|
125
|
+
|
126
|
+
# 如果out_file为None,则默认在原文件目录生成同名的新格式文件
|
127
|
+
if out_file is None:
|
128
|
+
out_file = in_file.with_suffix(f'.{fmt}')
|
129
|
+
else:
|
130
|
+
out_file = Path(out_file)
|
131
|
+
|
132
|
+
# 确保out_file的父目录存在,不存在则创建
|
133
|
+
out_file.parent.mkdir(parents=True, exist_ok=True)
|
134
|
+
|
135
|
+
# 调用upgrade_office_file函数进行转换
|
136
|
+
temp_file = cls.to_dir(in_file, out_dir=out_file.parent, fmt=fmt, timeout=timeout)
|
137
|
+
|
138
|
+
# 将生成的临时文件重命名为out_file
|
139
|
+
os.rename(temp_file, out_file)
|
140
|
+
|
141
|
+
return out_file
|
142
|
+
|
143
|
+
@classmethod
|
144
|
+
def to_tempfile(cls, in_file, fmt=None, *, timestamp_stem=False, create_subdir=False, timeout=10):
|
145
|
+
""" 将文件转换为临时文件
|
146
|
+
|
147
|
+
:param timestamp_stem: 时间戳文件名
|
148
|
+
:param create_subdir: 是否在临时目录中创建新的子目录
|
149
|
+
"""
|
150
|
+
if fmt is None:
|
151
|
+
fmt = infer_file_format(in_file)
|
152
|
+
|
153
|
+
root = Path(tempfile.gettempdir())
|
154
|
+
if create_subdir:
|
155
|
+
root2 = root / datetime.now().strftime('%Y%m%d.%H%M%S.%f')
|
156
|
+
root2.mkdir(parents=True, exist_ok=True)
|
157
|
+
root = root2
|
158
|
+
|
159
|
+
if timestamp_stem:
|
160
|
+
stem = datetime.now().strftime('%Y%m%d.%H%M%S.%f')
|
161
|
+
out_file = cls.to_file(in_file, out_file=root / f"{stem}.{fmt}", fmt=fmt, timeout=timeout)
|
162
|
+
else:
|
163
|
+
out_file = cls.to_dir(in_file, out_dir=root, fmt=fmt, timeout=timeout)
|
164
|
+
|
165
|
+
return out_file
|
pyxllib/file/movielib.py
CHANGED
@@ -1,148 +1,148 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2022/02/25 17:54
|
6
|
-
|
7
|
-
from pyxllib.prog.pupil import check_install_package
|
8
|
-
|
9
|
-
check_install_package('moviepy')
|
10
|
-
|
11
|
-
import cv2
|
12
|
-
from moviepy.editor import VideoFileClip
|
13
|
-
from moviepy.editor import cvsecs
|
14
|
-
import numpy as np
|
15
|
-
from tqdm import tqdm
|
16
|
-
|
17
|
-
from pyxllib.prog.pupil import inject_members
|
18
|
-
from pyxllib.file.specialist import XlPath
|
19
|
-
from pyxllib.cv.imhash import get_init_hash, phash
|
20
|
-
from pyxllib.cv.xlcvlib import xlcv
|
21
|
-
|
22
|
-
|
23
|
-
class XlVideoFileClip(VideoFileClip):
|
24
|
-
|
25
|
-
def get_frame2(self, time_point, *, scale=None):
|
26
|
-
""" 官方获得的图片通道是RGB,但是cv2处理的统一规则是BGR,要转换过来
|
27
|
-
|
28
|
-
:param scale: 获取图片后是否要按统一的比例再缩放一下
|
29
|
-
"""
|
30
|
-
frame = self.get_frame(time_point)
|
31
|
-
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
32
|
-
|
33
|
-
if scale:
|
34
|
-
frame = cv2.resize(frame, (0, 0), fx=scale, fy=scale)
|
35
|
-
|
36
|
-
return frame
|
37
|
-
|
38
|
-
def get_frames(self, time_points=None, *, interval_second=0.1, cur_hash=None, head_frame=None, scale=None,
|
39
|
-
filter_mode=2, print_mode=True):
|
40
|
-
""" 同时获得多帧图片
|
41
|
-
|
42
|
-
:param time_points: 类list对象,元素是时间点(可以是字符串格式,也可以是数值秒数)
|
43
|
-
常见生成方法参考:np.array(0, clip.duration, 0.1)
|
44
|
-
:param filter_mode: 过滤强度
|
45
|
-
因为times如果取的非常密集,比如每0.1秒取一帧,会有很大概率是重复、相近的图
|
46
|
-
此时可以用这个参数控制误差在多大以内的图会保留
|
47
|
-
使用phash判断帧之间相似度,只要误差在该设定阈值内,都会被跳过
|
48
|
-
可以设置0,不过这样过滤程度是最低的
|
49
|
-
也可以输入-1,表示完全不过滤,不计算phash。当这种情况也不太必要用这个函数了,可以直接普通循环。
|
50
|
-
也可以故意设置的特别大(phash最大差距是64),那这样相当于差异很大的也过滤掉了
|
51
|
-
:param head_frame: 提供一张初始图片,主要是配合filter_mode用于去重的
|
52
|
-
:param cur_hash: 类似head_frame作用,但直接提供hash值
|
53
|
-
:return: 使用yield机制,防止图片数据太多,内存爆炸了
|
54
|
-
当filter_mode>=0时,返回 时间点time_point和对应的图片im
|
55
|
-
"""
|
56
|
-
if cur_hash is None:
|
57
|
-
if head_frame is None:
|
58
|
-
cur_hash = get_init_hash()
|
59
|
-
else:
|
60
|
-
cur_hash = phash(head_frame)
|
61
|
-
|
62
|
-
if time_points is None:
|
63
|
-
time_points = np.arange(0, self.duration, interval_second)
|
64
|
-
|
65
|
-
for time_point in tqdm(time_points, disable=not print_mode):
|
66
|
-
im = self.get_frame2(time_point, scale=scale)
|
67
|
-
|
68
|
-
if filter_mode >= 0:
|
69
|
-
last_hash, cur_hash = cur_hash, phash(im)
|
70
|
-
if cur_hash - last_hash <= filter_mode:
|
71
|
-
continue
|
72
|
-
yield time_point, im
|
73
|
-
else:
|
74
|
-
yield im
|
75
|
-
|
76
|
-
def save_frames(self, out_dir, time_points=None, interval_second=0.1, **kwargs):
|
77
|
-
""" 跟get_frames差不多,多了一步自动存储图片文件到目录里
|
78
|
-
|
79
|
-
这个功能是比较定制化的,不是那么泛用,但会有很多相关类似的操作需求,可以参考这里的代码实现
|
80
|
-
"""
|
81
|
-
# 1 time_points
|
82
|
-
if time_points is None:
|
83
|
-
time_points = np.arange(0, self.duration, interval_second)
|
84
|
-
out_dir = XlPath(out_dir)
|
85
|
-
|
86
|
-
# 2 按时间点取图片,保存文件
|
87
|
-
for time_point, frame in self.get_frames(time_points, **kwargs):
|
88
|
-
tt = int(time_point * 10)
|
89
|
-
m, s, ms = tt // 600, (tt // 10) % 60, tt % 10
|
90
|
-
xlcv.write(frame, out_dir / f'{m:03}_{s:02}.{ms}.jpg')
|
91
|
-
|
92
|
-
def join_subtitles_image(self, time_points, ltrb_pos=None, *,
|
93
|
-
crop_first_frame=False,
|
94
|
-
filter_mode=2):
|
95
|
-
""" 生成字幕拼图
|
96
|
-
|
97
|
-
:param time_points: 在哪些时间点截图
|
98
|
-
:param ltrb_pos: 字幕所在位置,没有输入则默认全部全图拼接,
|
99
|
-
一般指定上下就行了,即 [None, 640, None, 700]
|
100
|
-
:param crop_first_frame:
|
101
|
-
False,保留第一帧的完整性
|
102
|
-
True,第一帧也只裁剪字幕部分
|
103
|
-
:param filter_mode: 对于给出的时间点图片,去除相邻相同的图片
|
104
|
-
:return:
|
105
|
-
|
106
|
-
参考用法:
|
107
|
-
from pyxllib.file.movielib import VideoFileClip
|
108
|
-
clip = VideoFileClip(str(Paths.videos / '觉观01.mp4')) # 必须要转str类型
|
109
|
-
im = clip.join_subtitles_image(np.arange(30, 40, 0.1), [None, 640, None, 695])
|
110
|
-
xlcv.write(im, Paths.videos / 'test.jpg')
|
111
|
-
"""
|
112
|
-
# 1 如果有左右裁剪要提前处理
|
113
|
-
x1, y1, x2, y2 = ltrb_pos
|
114
|
-
if x1 or x2:
|
115
|
-
clip = self.crop(x1=x1, x2=x2)
|
116
|
-
else:
|
117
|
-
clip = self
|
118
|
-
|
119
|
-
# 2 第1帧是否要保留全图
|
120
|
-
frame_list = []
|
121
|
-
if crop_first_frame:
|
122
|
-
head_frame = None
|
123
|
-
else:
|
124
|
-
time_points = tuple(time_points)
|
125
|
-
head_frame = clip.get_frame2(time_points[0])
|
126
|
-
frame_list.append(head_frame[:y2])
|
127
|
-
head_frame = head_frame[y1:y2]
|
128
|
-
time_points = time_points[1:]
|
129
|
-
|
130
|
-
# 3 裁剪字幕区域
|
131
|
-
clip = clip.crop(y1=y1, y2=y2)
|
132
|
-
for time_point, frame in clip.get_frames(time_points, filter_mode=filter_mode, head_frame=head_frame):
|
133
|
-
frame_list.append(frame)
|
134
|
-
|
135
|
-
# 4 拼接完整图
|
136
|
-
im = xlcv.concat(frame_list, pad=0)
|
137
|
-
return im
|
138
|
-
|
139
|
-
|
140
|
-
inject_members(XlVideoFileClip, VideoFileClip)
|
141
|
-
|
142
|
-
|
143
|
-
def clip_video(input_file, output_file, start_time, end_time):
|
144
|
-
start_time = cvsecs(start_time)
|
145
|
-
end_time = cvsecs(end_time)
|
146
|
-
clip = VideoFileClip(input_file).subclip(start_time, end_time)
|
147
|
-
clip.write_videofile(output_file)
|
148
|
-
clip.reader.close()
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2022/02/25 17:54
|
6
|
+
|
7
|
+
from pyxllib.prog.pupil import check_install_package
|
8
|
+
|
9
|
+
check_install_package('moviepy')
|
10
|
+
|
11
|
+
import cv2
|
12
|
+
from moviepy.editor import VideoFileClip
|
13
|
+
from moviepy.editor import cvsecs
|
14
|
+
import numpy as np
|
15
|
+
from tqdm import tqdm
|
16
|
+
|
17
|
+
from pyxllib.prog.pupil import inject_members
|
18
|
+
from pyxllib.file.specialist import XlPath
|
19
|
+
from pyxllib.cv.imhash import get_init_hash, phash
|
20
|
+
from pyxllib.cv.xlcvlib import xlcv
|
21
|
+
|
22
|
+
|
23
|
+
class XlVideoFileClip(VideoFileClip):
|
24
|
+
|
25
|
+
def get_frame2(self, time_point, *, scale=None):
|
26
|
+
""" 官方获得的图片通道是RGB,但是cv2处理的统一规则是BGR,要转换过来
|
27
|
+
|
28
|
+
:param scale: 获取图片后是否要按统一的比例再缩放一下
|
29
|
+
"""
|
30
|
+
frame = self.get_frame(time_point)
|
31
|
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
32
|
+
|
33
|
+
if scale:
|
34
|
+
frame = cv2.resize(frame, (0, 0), fx=scale, fy=scale)
|
35
|
+
|
36
|
+
return frame
|
37
|
+
|
38
|
+
def get_frames(self, time_points=None, *, interval_second=0.1, cur_hash=None, head_frame=None, scale=None,
|
39
|
+
filter_mode=2, print_mode=True):
|
40
|
+
""" 同时获得多帧图片
|
41
|
+
|
42
|
+
:param time_points: 类list对象,元素是时间点(可以是字符串格式,也可以是数值秒数)
|
43
|
+
常见生成方法参考:np.array(0, clip.duration, 0.1)
|
44
|
+
:param filter_mode: 过滤强度
|
45
|
+
因为times如果取的非常密集,比如每0.1秒取一帧,会有很大概率是重复、相近的图
|
46
|
+
此时可以用这个参数控制误差在多大以内的图会保留
|
47
|
+
使用phash判断帧之间相似度,只要误差在该设定阈值内,都会被跳过
|
48
|
+
可以设置0,不过这样过滤程度是最低的
|
49
|
+
也可以输入-1,表示完全不过滤,不计算phash。当这种情况也不太必要用这个函数了,可以直接普通循环。
|
50
|
+
也可以故意设置的特别大(phash最大差距是64),那这样相当于差异很大的也过滤掉了
|
51
|
+
:param head_frame: 提供一张初始图片,主要是配合filter_mode用于去重的
|
52
|
+
:param cur_hash: 类似head_frame作用,但直接提供hash值
|
53
|
+
:return: 使用yield机制,防止图片数据太多,内存爆炸了
|
54
|
+
当filter_mode>=0时,返回 时间点time_point和对应的图片im
|
55
|
+
"""
|
56
|
+
if cur_hash is None:
|
57
|
+
if head_frame is None:
|
58
|
+
cur_hash = get_init_hash()
|
59
|
+
else:
|
60
|
+
cur_hash = phash(head_frame)
|
61
|
+
|
62
|
+
if time_points is None:
|
63
|
+
time_points = np.arange(0, self.duration, interval_second)
|
64
|
+
|
65
|
+
for time_point in tqdm(time_points, disable=not print_mode):
|
66
|
+
im = self.get_frame2(time_point, scale=scale)
|
67
|
+
|
68
|
+
if filter_mode >= 0:
|
69
|
+
last_hash, cur_hash = cur_hash, phash(im)
|
70
|
+
if cur_hash - last_hash <= filter_mode:
|
71
|
+
continue
|
72
|
+
yield time_point, im
|
73
|
+
else:
|
74
|
+
yield im
|
75
|
+
|
76
|
+
def save_frames(self, out_dir, time_points=None, interval_second=0.1, **kwargs):
|
77
|
+
""" 跟get_frames差不多,多了一步自动存储图片文件到目录里
|
78
|
+
|
79
|
+
这个功能是比较定制化的,不是那么泛用,但会有很多相关类似的操作需求,可以参考这里的代码实现
|
80
|
+
"""
|
81
|
+
# 1 time_points
|
82
|
+
if time_points is None:
|
83
|
+
time_points = np.arange(0, self.duration, interval_second)
|
84
|
+
out_dir = XlPath(out_dir)
|
85
|
+
|
86
|
+
# 2 按时间点取图片,保存文件
|
87
|
+
for time_point, frame in self.get_frames(time_points, **kwargs):
|
88
|
+
tt = int(time_point * 10)
|
89
|
+
m, s, ms = tt // 600, (tt // 10) % 60, tt % 10
|
90
|
+
xlcv.write(frame, out_dir / f'{m:03}_{s:02}.{ms}.jpg')
|
91
|
+
|
92
|
+
def join_subtitles_image(self, time_points, ltrb_pos=None, *,
|
93
|
+
crop_first_frame=False,
|
94
|
+
filter_mode=2):
|
95
|
+
""" 生成字幕拼图
|
96
|
+
|
97
|
+
:param time_points: 在哪些时间点截图
|
98
|
+
:param ltrb_pos: 字幕所在位置,没有输入则默认全部全图拼接,
|
99
|
+
一般指定上下就行了,即 [None, 640, None, 700]
|
100
|
+
:param crop_first_frame:
|
101
|
+
False,保留第一帧的完整性
|
102
|
+
True,第一帧也只裁剪字幕部分
|
103
|
+
:param filter_mode: 对于给出的时间点图片,去除相邻相同的图片
|
104
|
+
:return:
|
105
|
+
|
106
|
+
参考用法:
|
107
|
+
from pyxllib.file.movielib import VideoFileClip
|
108
|
+
clip = VideoFileClip(str(Paths.videos / '觉观01.mp4')) # 必须要转str类型
|
109
|
+
im = clip.join_subtitles_image(np.arange(30, 40, 0.1), [None, 640, None, 695])
|
110
|
+
xlcv.write(im, Paths.videos / 'test.jpg')
|
111
|
+
"""
|
112
|
+
# 1 如果有左右裁剪要提前处理
|
113
|
+
x1, y1, x2, y2 = ltrb_pos
|
114
|
+
if x1 or x2:
|
115
|
+
clip = self.crop(x1=x1, x2=x2)
|
116
|
+
else:
|
117
|
+
clip = self
|
118
|
+
|
119
|
+
# 2 第1帧是否要保留全图
|
120
|
+
frame_list = []
|
121
|
+
if crop_first_frame:
|
122
|
+
head_frame = None
|
123
|
+
else:
|
124
|
+
time_points = tuple(time_points)
|
125
|
+
head_frame = clip.get_frame2(time_points[0])
|
126
|
+
frame_list.append(head_frame[:y2])
|
127
|
+
head_frame = head_frame[y1:y2]
|
128
|
+
time_points = time_points[1:]
|
129
|
+
|
130
|
+
# 3 裁剪字幕区域
|
131
|
+
clip = clip.crop(y1=y1, y2=y2)
|
132
|
+
for time_point, frame in clip.get_frames(time_points, filter_mode=filter_mode, head_frame=head_frame):
|
133
|
+
frame_list.append(frame)
|
134
|
+
|
135
|
+
# 4 拼接完整图
|
136
|
+
im = xlcv.concat(frame_list, pad=0)
|
137
|
+
return im
|
138
|
+
|
139
|
+
|
140
|
+
inject_members(XlVideoFileClip, VideoFileClip)
|
141
|
+
|
142
|
+
|
143
|
+
def clip_video(input_file, output_file, start_time, end_time):
|
144
|
+
start_time = cvsecs(start_time)
|
145
|
+
end_time = cvsecs(end_time)
|
146
|
+
clip = VideoFileClip(input_file).subclip(start_time, end_time)
|
147
|
+
clip.write_videofile(output_file)
|
148
|
+
clip.reader.close()
|
pyxllib/file/newbie.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : 陈坤泽
|
4
|
-
# @Email : 877362867@qq.com
|
5
|
-
# @Date : 2021/06/06 10:51
|
6
|
-
|
7
|
-
def linux_path_fmt(p):
|
8
|
-
p = str(p)
|
9
|
-
p = p.replace('\\', '/')
|
10
|
-
return p
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : 陈坤泽
|
4
|
+
# @Email : 877362867@qq.com
|
5
|
+
# @Date : 2021/06/06 10:51
|
6
|
+
|
7
|
+
def linux_path_fmt(p):
|
8
|
+
p = str(p)
|
9
|
+
p = p.replace('\\', '/')
|
10
|
+
return p
|