PyPI - pyxllib - Versions diffs - 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl - Mend

pyxllib 0.0.43py3-none-any.whl → 0.3.197py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

pyxllib/__init__.py +9 -2
pyxllib/algo/__init__.py +8 -0
pyxllib/algo/disjoint.py +54 -0
pyxllib/algo/geo.py +541 -0
pyxllib/{util/mathlib.py → algo/intervals.py} +172 -36
pyxllib/algo/matcher.py +389 -0
pyxllib/algo/newbie.py +166 -0
pyxllib/algo/pupil.py +629 -0
pyxllib/algo/shapelylib.py +67 -0
pyxllib/algo/specialist.py +241 -0
pyxllib/algo/stat.py +494 -0
pyxllib/algo/treelib.py +149 -0
pyxllib/algo/unitlib.py +66 -0
pyxllib/autogui/__init__.py +5 -0
pyxllib/autogui/activewin.py +246 -0
pyxllib/autogui/all.py +9 -0
pyxllib/autogui/autogui.py +852 -0
pyxllib/autogui/uiautolib.py +362 -0
pyxllib/autogui/virtualkey.py +102 -0
pyxllib/autogui/wechat.py +827 -0
pyxllib/autogui/wechat_msg.py +421 -0
pyxllib/autogui/wxautolib.py +84 -0
pyxllib/cv/__init__.py +1 -11
pyxllib/cv/expert.py +267 -0
pyxllib/cv/{imlib.py → imfile.py} +18 -83
pyxllib/cv/imhash.py +39 -0
pyxllib/cv/pupil.py +9 -0
pyxllib/cv/rgbfmt.py +1525 -0
pyxllib/cv/slidercaptcha.py +137 -0
pyxllib/cv/trackbartools.py +163 -49
pyxllib/cv/xlcvlib.py +1040 -0
pyxllib/cv/xlpillib.py +423 -0
pyxllib/data/__init__.py +0 -0
pyxllib/data/echarts.py +240 -0
pyxllib/data/jsonlib.py +89 -0
pyxllib/{util/oss2_.py → data/oss.py} +11 -9
pyxllib/data/pglib.py +1127 -0
pyxllib/data/sqlite.py +568 -0
pyxllib/{util → data}/sqllib.py +13 -31
pyxllib/ext/JLineViewer.py +505 -0
pyxllib/ext/__init__.py +6 -0
pyxllib/{util → ext}/demolib.py +119 -35
pyxllib/ext/drissionlib.py +277 -0
pyxllib/ext/kq5034lib.py +12 -0
pyxllib/{util/main.py → ext/old.py} +122 -284
pyxllib/ext/qt.py +449 -0
pyxllib/ext/robustprocfile.py +497 -0
pyxllib/ext/seleniumlib.py +76 -0
pyxllib/{util/tklib.py → ext/tk.py} +10 -11
pyxllib/ext/unixlib.py +827 -0
pyxllib/ext/utools.py +351 -0
pyxllib/{util/webhooklib.py → ext/webhook.py} +45 -17
pyxllib/ext/win32lib.py +40 -0
pyxllib/ext/wjxlib.py +88 -0
pyxllib/ext/wpsapi.py +124 -0
pyxllib/ext/xlwork.py +9 -0
pyxllib/ext/yuquelib.py +1105 -0
pyxllib/file/__init__.py +17 -0
pyxllib/file/docxlib.py +761 -0
pyxllib/{util → file}/gitlib.py +40 -27
pyxllib/file/libreoffice.py +165 -0
pyxllib/file/movielib.py +148 -0
pyxllib/file/newbie.py +10 -0
pyxllib/file/onenotelib.py +1469 -0
pyxllib/file/packlib/__init__.py +330 -0
pyxllib/{util → file/packlib}/zipfile.py +598 -195
pyxllib/file/pdflib.py +426 -0
pyxllib/file/pupil.py +185 -0
pyxllib/file/specialist/__init__.py +685 -0
pyxllib/{basic/_5_dirlib.py → file/specialist/dirlib.py} +364 -93
pyxllib/file/specialist/download.py +193 -0
pyxllib/file/specialist/filelib.py +2829 -0
pyxllib/file/xlsxlib.py +3131 -0
pyxllib/file/xlsyncfile.py +341 -0
pyxllib/prog/__init__.py +5 -0
pyxllib/prog/cachetools.py +64 -0
pyxllib/prog/deprecatedlib.py +233 -0
pyxllib/prog/filelock.py +42 -0
pyxllib/prog/ipyexec.py +253 -0
pyxllib/prog/multiprogs.py +940 -0
pyxllib/prog/newbie.py +451 -0
pyxllib/prog/pupil.py +1197 -0
pyxllib/{sitepackages.py → prog/sitepackages.py} +5 -3
pyxllib/prog/specialist/__init__.py +391 -0
pyxllib/prog/specialist/bc.py +203 -0
pyxllib/prog/specialist/browser.py +497 -0
pyxllib/prog/specialist/common.py +347 -0
pyxllib/prog/specialist/datetime.py +199 -0
pyxllib/prog/specialist/tictoc.py +240 -0
pyxllib/prog/specialist/xllog.py +180 -0
pyxllib/prog/xlosenv.py +108 -0
pyxllib/stdlib/__init__.py +17 -0
pyxllib/{util → stdlib}/tablepyxl/__init__.py +1 -3
pyxllib/{util → stdlib}/tablepyxl/style.py +1 -1
pyxllib/{util → stdlib}/tablepyxl/tablepyxl.py +2 -4
pyxllib/text/__init__.py +8 -0
pyxllib/text/ahocorasick.py +39 -0
pyxllib/text/airscript.js +744 -0
pyxllib/text/charclasslib.py +121 -0
pyxllib/text/jiebalib.py +267 -0
pyxllib/text/jinjalib.py +32 -0
pyxllib/text/jsa_ai_prompt.md +271 -0
pyxllib/text/jscode.py +922 -0
pyxllib/text/latex/__init__.py +158 -0
pyxllib/text/levenshtein.py +303 -0
pyxllib/text/nestenv.py +1215 -0
pyxllib/text/newbie.py +300 -0
pyxllib/text/pupil/__init__.py +8 -0
pyxllib/text/pupil/common.py +1121 -0
pyxllib/text/pupil/xlalign.py +326 -0
pyxllib/text/pycode.py +47 -0
pyxllib/text/specialist/__init__.py +8 -0
pyxllib/text/specialist/common.py +112 -0
pyxllib/text/specialist/ptag.py +186 -0
pyxllib/text/spellchecker.py +172 -0
pyxllib/text/templates/echart_base.html +11 -0
pyxllib/text/templates/highlight_code.html +17 -0
pyxllib/text/templates/latex_editor.html +103 -0
pyxllib/text/vbacode.py +17 -0
pyxllib/text/xmllib.py +747 -0
pyxllib/xl.py +39 -0
pyxllib/xlcv.py +17 -0
pyxllib-0.3.197.dist-info/METADATA +48 -0
pyxllib-0.3.197.dist-info/RECORD +126 -0
{pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +4 -5
pyxllib/basic/_1_strlib.py +0 -945
pyxllib/basic/_2_timelib.py +0 -488
pyxllib/basic/_3_pathlib.py +0 -916
pyxllib/basic/_4_loglib.py +0 -419
pyxllib/basic/__init__.py +0 -54
pyxllib/basic/arrow_.py +0 -250
pyxllib/basic/chardet_.py +0 -66
pyxllib/basic/dirlib.py +0 -529
pyxllib/basic/dprint.py +0 -202
pyxllib/basic/extension.py +0 -12
pyxllib/basic/judge.py +0 -31
pyxllib/basic/log.py +0 -204
pyxllib/basic/pathlib_.py +0 -705
pyxllib/basic/pytictoc.py +0 -102
pyxllib/basic/qiniu_.py +0 -61
pyxllib/basic/strlib.py +0 -761
pyxllib/basic/timer.py +0 -132
pyxllib/cv/cv.py +0 -834
pyxllib/cv/cvlib/_1_geo.py +0 -543
pyxllib/cv/cvlib/_2_cvprcs.py +0 -309
pyxllib/cv/cvlib/_2_imgproc.py +0 -594
pyxllib/cv/cvlib/_3_pilprcs.py +0 -80
pyxllib/cv/cvlib/_4_cvimg.py +0 -211
pyxllib/cv/cvlib/__init__.py +0 -10
pyxllib/cv/debugtools.py +0 -82
pyxllib/cv/fitz_.py +0 -300
pyxllib/cv/installer.py +0 -42
pyxllib/debug/_0_installer.py +0 -38
pyxllib/debug/_1_typelib.py +0 -277
pyxllib/debug/_2_chrome.py +0 -198
pyxllib/debug/_3_showdir.py +0 -161
pyxllib/debug/_4_bcompare.py +0 -140
pyxllib/debug/__init__.py +0 -49
pyxllib/debug/bcompare.py +0 -132
pyxllib/debug/chrome.py +0 -198
pyxllib/debug/installer.py +0 -38
pyxllib/debug/showdir.py +0 -158
pyxllib/debug/typelib.py +0 -278
pyxllib/image/__init__.py +0 -12
pyxllib/torch/__init__.py +0 -20
pyxllib/torch/modellib.py +0 -37
pyxllib/torch/trainlib.py +0 -344
pyxllib/util/__init__.py +0 -20
pyxllib/util/aip_.py +0 -141
pyxllib/util/casiadb.py +0 -59
pyxllib/util/excellib.py +0 -495
pyxllib/util/filelib.py +0 -612
pyxllib/util/jsondata.py +0 -27
pyxllib/util/jsondata2.py +0 -92
pyxllib/util/labelmelib.py +0 -139
pyxllib/util/onepy/__init__.py +0 -29
pyxllib/util/onepy/onepy.py +0 -574
pyxllib/util/onepy/onmanager.py +0 -170
pyxllib/util/pyautogui_.py +0 -219
pyxllib/util/textlib.py +0 -1305
pyxllib/util/unorder.py +0 -22
pyxllib/util/xmllib.py +0 -639
pyxllib-0.0.43.dist-info/METADATA +0 -39
pyxllib-0.0.43.dist-info/RECORD +0 -80
pyxllib-0.0.43.dist-info/top_level.txt +0 -1
{pyxllib-0.0.43.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0

pyxllib/file/docxlib.py ADDED Viewed

@@ -0,0 +1,761 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @Author : 陈坤泽
+# @Email  : 877362867@qq.com
+# @Date   : 2021/08/31 09:56
+from pyxllib.prog.pupil import check_install_package
+check_install_package('win32com', 'pywin32')
+check_install_package('docx', 'python-docx')
+import json
+import os
+import re
+import pythoncom
+from win32com.client import constants
+import win32com.client as win32
+import docx
+import docx.table
+import docx.enum
+from pyxllib.prog.pupil import DictTool, inject_members, run_once
+from pyxllib.prog.specialist import get_etag, browser, XlPath
+from pyxllib.text.pupil import strwidth
+def __docx():
+    """ python-docx 相关封装
+    """
+    pass
+class DocxTools:
+    @classmethod
+    def to_pdf(cls, docx_file, pdf_file=None):
+        check_install_package('docx2pdf')  # 安装不成功的时候可以考虑加参数：--user
+        import docx2pdf
+        if pdf_file is None:
+            pdf_file = docx_file.with_suffix('.pdf')
+        docx2pdf.convert(str(docx_file), str(pdf_file))
+        return pdf_file
+    @classmethod
+    def merge(cls, master_file, toc, *, outline='demote'):
+        """ 合并多份docx文件
+        :param master_file: 要合并到哪个主文件
+            注意如果这个文件已存在，会被替换，重置
+        :param toc: 类似fitz的table of contents，用一个n*3的表格表示新文档的格式
+            list，每个元素三列：标题级别，标题名称，(可选)对应文件内容
+        :param outline: 原来每份子文档里的标题，插入到新文档中的处理规则
+            demote：降级
+            clear：清除
+        这个功能还有些局限性，后面要扩展鲁棒性
+        TODO 增加一个支持将原文档标题降级的功能，降到toc之后
+        """
+        app = XlWin32WordApplication.get_app()
+        master_doc = app.new_doc(master_file)
+        for item in toc:
+            lvl, title, file = item
+            # 1 加一个标题
+            r = master_doc.Paragraphs.Add().Range
+            r.InsertBefore(title)
+            r.Style = master_doc.Styles(f'标题 {lvl}')
+            # 2 拷贝完整的内容
+            if file:
+                file = XlPath(file)
+                member_doc = app.open_doc(file)
+                member_doc.Activate()
+                # 处理原来文档的目录级别
+                if outline == 'demote':
+                    member_doc.outline_demote(lvl)
+                elif outline == 'clear':
+                    # 降10级就相当于清除所有原来的标题
+                    member_doc.outline_demote(10)
+                app.Selection.WholeStory()
+                app.Selection.Copy()
+                master_doc.Activate()
+                app.Selection.EndKey(Unit=app.wd('Story'))  # 跳到文档末尾
+                app.Selection.Paste()
+                member_doc.Close()
+        master_doc.save()
+        master_doc.Close(True)
+class Document:
+    """ 这个库写英文文档还不错。但不能做中文，字体会错乱。
+    """
+    def __init__(self, docx_file=None):
+        """
+        Args:
+            docx_file:
+                已有的word文件路径：打开
+                还没创建的word文件路径：在个别功能需要的时候，会自动创建
+                None：在临时文件夹生成一个默认的word文件
+        """
+        if docx_file is None:
+            self.docx_file = XlPath.tempfile('.docx')
+        else:
+            self.docx_file = XlPath(docx_file)
+        if self.docx_file:
+            self.doc = docx.Document(str(docx_file))
+        else:
+            self.doc = docx.Document()
+    def write(self):
+        XlPath(self.docx_file.parent).mkdir(exist_ok=True)
+        self.doc.save(str(self.docx_file))
+    def to_pdf(self, pdf_file=None):
+        self.write()
+        pdf_file = DocxTools.to_pdf(self.docx_file, pdf_file)
+        return pdf_file
+    def to_fitzdoc(self):
+        """ 获得 fitz的pdf文档对象
+        :return: FitzDoc对象
+        """
+        from pyxllib.file.pdflib import FitzDoc
+        pdf_file = self.to_pdf()
+        doc = FitzDoc(pdf_file)
+        return doc
+    def to_images(self, file_fmt='{filestem}_{number}.png', *args, scale=1, **kwargs):
+        doc = self.to_fitzdoc()
+        files = doc.to_images(doc.src_file.parent, file_fmt, *args, scale=scale, **kwargs)
+        return files
+    def browser(self):
+        """ 转pdf，使用浏览器的查看效果
+        """
+        pdf_file = self.to_pdf()
+        browser(pdf_file)
+    def display(self):
+        """ 转图片，使用jupyter环境的查看效果
+        """
+        from pyxllib.cv.expert import PilImg
+        # 转图片，并且裁剪，加边框输出
+        doc = self.to_fitzdoc()
+        for i in range(doc.page_count):
+            page = doc.load_page(i)
+            print('= ' * 10 + f' {page} ' + '= ' * 10)
+            img: PilImg = page.get_pil_image()
+            img.trim(border=5).plot_border().display()
+            del page
+    def to_labelmes(self, dst_dir=None, file_fmt='{filestem}_{number}.png', *, views=(0, 0, 1, 0), scale=1,
+                    advance=False, indent=None):
+        """ 转labelme格式查看
+        本质是把docx转成pdf，利用pdf的解析生成labelme格式的标准框查看
+        :param views: 详见to_labelmes的描述
+            各位依次代表是否显示对应细粒度的标注：blocks、lines、spans、chars
+        :param bool|dict advance: 是否开启“高级”功能，开启后能获得下划线等属性，但速度会慢很多
+            源生的fitz pdf解析是处理不了下划线的，开启高级功能后，有办法通过特殊手段实现下划线的解析
+            默认会修正目前已知的下划线、颜色偏差问题
+            dict类型：以后功能多了，可能要支持自定义搭配，比如只复原unberline，但不管颜色偏差
+        """
+        from pyxlpr.data.labelme import LabelmeDict
+        # 1 转成图片，及json标注
+        doc = self.to_fitzdoc()
+        imfiles = doc.to_images(dst_dir, file_fmt, scale=scale)
+        # 2 高级功能
+        def is_color(x):
+            return x and sum(x)
+        def to_labelmes_advance():
+            m = 50  # 匹配run时，上文关联的文字长度，越长越严格
+            # 1 将带有下划线的run对象，使用特殊的hash规则存储起来
+            content = []  # 使用已遍历到的文本内容作为hash值
+            elements = {}
+            for p in self.paragraphs:
+                for r in p.runs:
+                    # 要去掉空格，不然可能对不上。试过strip不行。分段会不太一样，还是把所有空格删了靠谱。
+                    content.append(re.sub(r'\s+', '', r.text))
+                    if r.underline or is_color(r.font.color.rgb):  # 对有下划线、颜色的对象进行存储
+                        # print(r.text + ',', r.underline, r.font.color.rgb, ''.join(content))
+                        etag = get_etag(''.join(content)[-m:])  # 全部字符都判断的话太严格了，所以减小区间~
+                        elements[etag] = r
+            # 2 检查json标注中为span的，其哈希规则是否有对应，则要单独设置扩展属性
+            content = ''
+            for i, file in enumerate(imfiles):
+                page = doc.load_page(i)
+                lmdict = LabelmeDict.gen_data(file)
+                lmdict['shapes'] = page.get_labelme_shapes('dict', views=views, scale=scale)
+                for sp in lmdict['shapes']:
+                    attrs = DictTool.json_loads(sp['label'], 'label')
+                    if attrs['category_name'] == 'span':
+                        content += re.sub(r'\s+', '', attrs['text'])
+                        etag = get_etag(content[-m:])
+                        # print(content)
+                        if etag in elements:
+                            # print(content)
+                            r = elements[etag]  # 对应的原run对象
+                            attrs = DictTool.json_loads(sp['label'])
+                            x = r.underline
+                            if x:
+                                attrs['underline'] = int(x)
+                            x = r.font.color.rgb
+                            if is_color(x):
+                                attrs['color'] = list(x)
+                            sp['label'] = json.dumps(attrs)
+                file.with_suffix('.json').write(lmdict, indent=indent)
+        # 3 获得json
+        if advance:
+            to_labelmes_advance()
+        else:
+            doc.to_labelmes(imfiles, views=views, scale=scale)
+    def __getattr__(self, item):
+        # 属性：
+        # core_properties
+        # element
+        # inline_shapes
+        # paragraphs
+        # part
+        # sections
+        # settings
+        # styles
+        # tables
+        # 方法：
+        # add_heading
+        # add_page_break
+        # add_paragraph
+        # add_picture
+        # add_section
+        # add_table
+        # save
+        return getattr(self.doc, item)
+class XlDocxTable(docx.table.Table):
+    def merge_samevalue_in_col(self, col, start_row=1):
+        """ 定义合并单元格的函数
+        :param col: 需要处理数据的列，0开始编号
+        :param start_row: 起始行，即表格中开始比对数据的行（其实标题排不排除一般无所谓~默认是排除了）
+        """
+        def merge_cells(start, end):
+            if end > start:
+                c = self.cell(start, col)
+                c.merge(self.cell(end, col))
+                c.text = self.cell(start, col).text.strip()
+                c.vertical_alignment = docx.enum.table.WD_ALIGN_VERTICAL.CENTER
+                c.paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
+        ref, start = None, start_row
+        for i in range(start_row, len(self.rows)):
+            v = self.cell(i, col).text
+            if v != ref:
+                merge_cells(start, i - 1)
+                ref, start = v, i
+            else:
+                self.cell(i, col).text = ''
+        merge_cells(start, i)
+inject_members(XlDocxTable, docx.table.Table)
+def __win32_word():
+    """ 使用win32com调用word
+    vba的文档：示例代码更多，vba语法也更熟悉，但显示的功能更不全
+        https://docs.microsoft.com/en-us/office/vba/api/word.saveas2
+    .net的文档：功能显示更全，应该是所有COM接口都有但示例代码更少、更不熟系
+        https://docs.microsoft.com/en-us/dotnet/api/microsoft.office.interop.word.documentclass.saveas2?view=word-pia
+    """
+    pass
+@run_once
+def inject_win32word(app, recursion_inject=False):
+    """ 给win32的word com接口添加功能
+    :param app: win32的类是临时生成的，需要给一个参考对象，才方便type(word)算出类型
+    :param recursion_inject: 是否递归，对目前有的各种子类扩展功能都绑定上
+        默认关闭，如果影响到性能，可以关闭，后面运行中需要时手动设定inject注入
+        开启，能方便业务层开发
+        之前有想过可以生成doc里的时候再inject这些对象，但如果是批量处理脚本，每次建立doc都判断我觉得也麻烦
+        长痛不如短痛，建立app的时候就把所有对象inject更方便
+    """
+    inject_members(XlWin32WordApplication, type(app), ignore_case=True)
+    if recursion_inject:
+        # 建一个临时文件，把各种需要绑定的对象都生成绑定一遍
+        # 确保初始化稍微慢点，但后面就方便了
+        doc = app.Documents.Add()
+        inject_members(XlWin32WordDocument, type(doc), ignore_case=True)
+        doc.Activate()
+        rng = doc.Range()  # 全空的文档，有区间[0,1)
+        inject_members(XlWin32WordRange, type(rng), ignore_case=True)
+        doc.Hyperlinks.Add(rng, 'url')  # 因为全空，这里会自动生成对应的明文url
+        inject_members(XlWin32WordHyperlink, type(doc.Hyperlinks(1)), ignore_case=True)
+        # 处理完关闭文档，不用保存
+        doc.Close(False)
+class XlWin32WordApplication:
+    @classmethod
+    def get_app(cls, app=None, *, visible=None, display_alerts=0, recursion_inject=True):
+        """
+        Args:
+            app: 可以自定义在外部使用Dispatch、DispatchEx等形式给入已初始化好的app
+            visible: 是否可见
+            display_alerts: 是否关闭警告
+            recursion_inject: 是否递归执行inject
+        """
+        # 1 get app
+        name = 'WORD.APPLICATION'
+        if app is None:
+            try:
+                app = win32.GetActiveObject(name)
+            except pythoncom.com_error:
+                pass
+        if app is None:
+            try:
+                # 名称用大写，会比较兼容旧的word2013等版本
+                # 尽量静态调度，才能获得 from win32com.client import constants 的常量
+                app = win32.gencache.EnsureDispatch(name)
+            except TypeError:
+                # 实在不行，就用动态调度
+                app = win32.dynamic.Dispatch(name)
+            # 注：好像docx的默认打开程序也会有影响，最好默认都是office，不要被改成wps
+        # 2 inject
+        inject_win32word(app, recursion_inject=recursion_inject)
+        if visible is not None:
+            app.Visible = visible
+        if display_alerts is not None:
+            app.DisplayAlerts = display_alerts  # 不警告
+        return app
+    def check_close(self, outfile):
+        """ 检查是否有指定名称的文件被打开，将其关闭，避免new_doc等操作出现问题
+        """
+        outfile = XlPath(outfile)
+        for x in self.Documents:
+            # 有可能文件来自onedrive，这里用safe_init更合理
+            if XlPath.init(x.Name, x.Path) == outfile:
+                x.Close()
+    def open_doc(self, file_name):
+        """ 打开已有的文件
+        原傻逼底层接口，默认都是读取用户目录下的，我给改成默认打开当前工作目录的文件
+        后文的save接口也是同理
+        """
+        doc = self.Documents.Open(str(XlPath.init(file_name, os.getcwd())))
+        return doc
+    def new_doc(self, file=None):
+        """ 创建一个新的文件
+        Args:
+            file: 文件路径
+                空：新建一个doc，到时候保存会默认到临时文件夹
+                不存在的文件名：新建对应的空文件
+                已存在的文件名：重置、覆盖一个新的空文件
+        使用该函数，会自动执行XlWin32WordDocument扩展。
+        """
+        if file is None:
+            file = XlPath.tempfile('.docx')
+        else:
+            file = XlPath(file)
+        doc = self.Documents.Add()  # 创建新的word文档
+        doc.save(file)
+        return doc
+    @classmethod
+    def wd(cls, name, part=None):
+        """ 输入字符串名称，获得对应的常量值
+        :param name: 必须省略前缀wd。这个函数叫wd，就是帮忙省略掉前缀wd的意思
+        :param part: 特定组别的枚举值，可以输入特殊的name模式来取值
+        """
+        if part is None:
+            return getattr(constants, 'wd' + name)
+        else:
+            raise ValueError
+class XlWin32WordDocument:
+    def save(self, file_name=None, fmt=None, retain=False, **kwargs):
+        """ 我自己简化的保存接口
+        :param file_name: 保存到指定路径，如果带有后缀
+        :param fmt: 毕竟是底层的com接口，不能做的太智能吧。连通过文件名后缀自动选择格式的功能都没有，要手动指定。
+            为了方便，对这些进行智能自动处理，得到一个合理的save接口。
+        :param retain: SaveAs2的机制：如果目标格式仍是word支持的，则doc会切换到目标文件。否则doc保留原文件对象。
+            这里retain若打开，则会自动做切换，保留原文件对象
+        :return: 跟retain有关，可能会"重置", outfile
+            默认返回 outfile
+            开启retain时，返回 outfile, doc
+        """
+        # 1 辅助函数
+        def save_format(fmt):
+            """ 枚举值映射，word保存类型的枚举
+            >> _('.html')
+            8
+            >> _('Pdf')
+            17
+            >> _('wdFormatFilteredHTML')
+            10
+            """
+            # 复杂格式可能无法完美支持所有功能。比如复杂的pdf无法使用SaveAs2实现，要用ExportAsFixedFormat。
+            # 有些情况可能无法使用gencache，导致没有constants，所以默认可以直接映射到整数，避免使用到constants
+            common = {'doc': 0,
+                      'html': 8,
+                      'htm': 10,  # 筛选过的html，一般文件会小的多
+                      'txt': 2,
+                      'docx': 16,
+                      'pdf': 17}
+            name = common.get(fmt.lower().lstrip('.'), fmt)
+            if isinstance(name, int):
+                return name
+            else:
+                return getattr(constants, 'wd' + name)
+        # 2 确认要存储的文件格式
+        if isinstance(fmt, str):
+            fmt = fmt.lower().lstrip('.')
+        elif file_name is not None:
+            fmt = XlPath(file_name).suffix[1:].lower()
+        elif self.Path:
+            fmt = os.path.splitext(self.Name)[1][1:].lower()
+        else:
+            fmt = 'docx'
+        # 3 保存一份原始的文件路径
+        origin_file = XlPath.init(self.Name, self.Path) if self.Path else None
+        # 4 如果有指定保存文件路径
+        if file_name is not None:
+            outfile = XlPath.init(file_name, os.getcwd())
+            if outfile.suffix[1:].lower() != fmt:
+                # 已有文件名，但这里指定的fmt不同于原文件，则认为是要另存为一个同名的不同格式文件
+                outfile = XlPath(outfile.stem, outfile.parent, suffix=fmt)
+            self.SaveAs2(str(outfile), save_format(fmt), **kwargs)
+        # 5 如果没指定保存文件路径
+        else:
+            if self.Path:
+                outfile = XlPath.init(self.Name, self.Path, suffix='.' + fmt)
+                self.SaveAs2(str(outfile), save_format(outfile.suffix), **kwargs)
+            else:
+                etag = get_etag(self.Content)
+                outfile = XlPath.init(etag, XlPath.tempdir(), suffix=fmt)
+                self.SaveAs2(str(outfile), save_format(fmt), **kwargs)
+        # 6 是否恢复原doc
+        cur_file = XlPath.init(self.Name, self.Path)  # 当前文件不一定是目标文件f，如果是pdf等格式也不会切换过去
+        if retain and origin_file and origin_file != cur_file:
+            app = self.Application
+            self.Close()
+            self = app.open_doc(origin_file)
+        # 7 返回值
+        if retain:
+            return outfile, self
+        else:
+            return outfile
+    # 先暂时不开启 doc.chars
+    # @staticmethod
+    # def chars(doc):
+    #     return doc.Range().chars
+    @property
+    def n_page(self):
+        return self.ComputeStatistics(2)
+        # return self.ActiveWindow.Panes(1).Pages.Count  # 这样计算也行
+    def browser(self, file_name=None, fmt='html', retain=False):
+        """ 这个函数可能会导致原doc指向对象被销毁，建议要不追返回值doc继续使用
+        """
+        res = self.save(file_name, fmt, retain=retain)
+        if retain:
+            outfile, self = res
+        else:
+            outfile = res
+        browser(outfile)
+        return self
+    def add_section_size(self, factor=1):
+        """ 显示每节长度的标记
+        一般在这里计算比在html计算方便
+        """
+        from humanfriendly import format_size
+        n = self.Paragraphs.Count
+        style_names, text_lens = [], []
+        for p in self.Paragraphs:
+            style_names.append(str(p.Style))
+            text_lens.append(strwidth(p.Range.Text))
+        for i, p in enumerate(self.Paragraphs):
+            name = style_names[i]
+            if name.startswith('标题'):
+                cumulate_size = 0
+                for j in range(i + 1, n):
+                    if style_names[j] != name:
+                        cumulate_size += text_lens[j - 1]
+                    else:
+                        break
+                if cumulate_size:
+                    size = format_size(cumulate_size * factor).replace(' ', '').replace('bytes', 'B')
+                    r = p.Range
+                    self.Range(r.Start, r.End - 1).InsertAfter(f'，{size}')
+    def outline_demote(self, demote_level):
+        """ 标题降级，降低level层 """
+        for p in self.Paragraphs:
+            p.Range.demote(demote_level)
+    def set_style(self, obj, name):
+        """ 给Paragraph、Range等按名称设置样式
+        :param obj: 当前doc下某个含有Style成员属性的子对象
+        :param name: 样式名称
+        """
+        setattr(obj, 'Style', self.Styles(name))
+    def add_paragraph(self, text='', style=None):
+        """ 自定义的插入段落
+        默认的插入比较麻烦，新建段落、插入文本、设置格式要多步实现，这里封装支持在一步进行多种操作
+        """
+        p = self.Paragraphs.Add()
+        if text:
+            p.Range.InsertBefore(text)
+        if style:
+            p.Style = self.Styles(style)
+        return p
+class XlWin32WordRange:
+    """ range是以下标0开始，左闭右开的区间
+    当一个区间出现混合属性，比如有的有加粗，有的没加粗时，标记值为 app.wd('Undefined') 9999999
+    vba的True是值-1，False是值0
+    """
+    def set_hyperlink(self, url):
+        """ 给当前rng添加超链接
+        """
+        doc = self.Parent
+        doc.Hyperlinks.Add(self, url)
+    @property
+    def chars(self):
+        """
+        注意，获得doc全部文本的其他常见方法
+            doc.Range().Text
+        """
+        # 有特殊换行，ch.Text可能会得到 '\r\x07'，为了位置对应，只记录一个字符
+        return ''.join([ch.Text[0] for ch in self.Characters])
+    def char_range(self, start=0, end=None):
+        """ 定位rng中的子range对象，这里是以可见字符Characters计数的
+        :param start: 下标类似切片的规则
+        :param end: 见start描述，允许越界，允许负数
+            默认不输入表示匹配到末尾
+        """
+        n = self.Characters.Count
+        if end is None or end > n:
+            end = n
+        elif end < 0:
+            end = n + end
+        start_idx, end_idx = self.Characters(start + 1).Start, self.Characters(end).End
+        return self.Document.Range(start_idx, end_idx)
+    def shifting(self, left=0, right=0):
+        """ range左右两边增加偏移量，返回重定位的rng
+        常用语段落定位，要在段落末尾增加内容时
+        >> rng2 = p.Range.shifting(right=-1)
+        """
+        return self.Document.Range(self.Start + left, self.End + right)
+    def demote(self, demote_level):
+        """ 标题降级，降低level层 """
+        name = self.Style.NameLocal  # 获得样式名称
+        m = re.match(r'标题 (\d)$', name)
+        if m:
+            lvl = int(m.group(1))
+            new_lvl = lvl + demote_level
+            new_style = f'标题 {new_lvl}' if new_lvl < 10 else '正文'
+            self.Style = self.Parent.Styles(new_style)
+    def set_font(self, font_fmt):
+        """ 设置字体各种格式
+        :param dict|str font_fmt:
+            dict，定制格式
+                布尔类型：Bold、Italic、Subscript、Superscript
+                可布尔的值类型：Underline
+                    支持的格式见：https://docs.microsoft.com/en-us/office/vba/api/word.wdunderline
+                值类型：Name、Size、Color、UnderlineColor
+            str，使用现有样式名
+        """
+        if isinstance(font_fmt, dict):
+            font = self.Font
+            for k, v in font_fmt.items():
+                setattr(font, k, v)
+        elif isinstance(font_fmt, str):
+            self.Style = self.Parent.Styles(font_fmt)
+        else:
+            raise ValueError
+    def insert_before(self, text, font_fmt=None):
+        """ 对原InsertBefore的功能封装
+        :return: 增加返回值，是新插入内容的range定位
+        """
+        start1, end1 = self.Start, self.End
+        self.InsertBefore(text)
+        bias = self.End - end1  # 新插入的内容大小
+        new_rng = self.Document.Range(start1, start1 + bias)
+        if font_fmt:
+            new_rng.set_font(font_fmt)
+        return new_rng
+    def insert_after(self, text, font_fmt=None):
+        """ 同insert_before，是InsertAfter的重封装
+        """
+        # 1
+        start1, end1 = self.Start, self.End
+        # 2 往后插入，会排除\r情况
+        doc = self.Document
+        ch = doc.Range(end1 - 1, end1).Text
+        if ch == '\r':
+            end1 -= 1
+            self = doc.Range(start1, end1)
+        # 3
+        self.InsertAfter(text)
+        bias = self.End - end1
+        new_rng = self.Document.Range(end1, end1 + bias)
+        if font_fmt:
+            new_rng.set_font(font_fmt)
+        return new_rng
+class XlWin32WordHyperlink:
+    @property
+    def netloc(self):
+        from urllib.parse import urlparse
+        linkp = urlparse(self.Name)  # 链接格式解析
+        # netloc = linkp.netloc or Path(linkp.path).name
+        netloc = linkp.netloc or linkp.scheme  # 可能是本地文件，此时记录其所在磁盘
+        return netloc
+    @property
+    def name(self):
+        """ 这个是转成明文的完整链接，如果要编码过的，可以取link.Name """
+        from urllib.parse import unquote
+        return unquote(self.Name)
+def rebuild_document_by_word(fmt='html', translate=False, navigation=False, visible=False, quit=None):
+    """ 将剪切板的内容粘贴到word重新排版，再转成fmt格式的文档，用浏览器打开
+    这个功能只能在windows平台使用，并且必须要安装有Word软件。
+    一般用于英文网站，生成双语阅读的模板，再调用谷歌翻译。
+    生成的文档如果有需要，一般是手动复制整理到另一个docx文件中。
+    Args:
+        fmt: 输出文件类型
+            常见的可以简写：html、pdf、txt
+            其他特殊需求可以用word原变量名：wdFormatDocument
+        visible: 是否展示运行过程，如果不展示，默认最后会close文档
+        quit: 运行完是否退出应用
+        translate: html专用业务功能，表示是否对p拷贝一份notranslate的对象，用于谷歌翻译双语对照
+        navigation: 是否增加导航栏
+            注意，使用导航栏后，页面就无法直接使用谷歌翻译了
+            但可以自己进入_content文件，使用谷歌翻译处理，自覆盖保存
+            然后再回到_index文件，刷新即可
+    """
+    import pyperclip
+    from pyxllib.text.xmllib import BeautifulSoup, html_bitran_template, MakeHtmlNavigation
+    # 1 保存的临时文件名采用etag
+    f = XlPath.init(get_etag(pyperclip.paste()), XlPath.tempdir(), suffix=fmt)
+    app = XlWin32WordApplication.get_app(visible=visible)
+    app.check_close(f)
+    doc = app.new_doc(f)
+    doc.Activate()
+    app.Selection.Paste()
+    # 2 如果需要，也可以在这个阶段，插入word自动化的操作，而不是后续在html层面操作
+    # 统计每节内容长度，每个字母1B，每个汉字2B
+    doc.add_section_size()
+    file = doc.save(f, fmt)
+    doc.Close()
+    # 3 html格式扩展功能
+    if fmt == 'html':
+        # 3.1 默认扩展功能
+        s = file.read_text(encoding='gbk')
+        # s = s.replace('\xa0', '')  # 不知道这样去除\xa0行不行，等下次遇到尝试
+        bs = BeautifulSoup(s, 'lxml')
+        bs.head_add_number()  # 给标题加上编号
+        # bs.head_add_size()  # 显示每节内容长短
+        content = str(bs)
+        # TODO 识别微信、pydoc，然后做一些自动化清理？
+        # TODO 过度缩进问题？
+        # 3.2 双语对照阅读
+        if translate:
+            # word生成的html固定是gbk编码
+            content = html_bitran_template(content)
+        # 原文是gbk，但到谷歌默认是utf8，所以改一改
+        # 这样改后问题是word可能又反而有问题了，不过word本来只是跳板，并不是要用word编辑html
+        file.write_text(content, encoding='utf8')
+        # 3.3 导航栏功能
+        # 作为临时使用可以开，如果要复制到word，并没有必要
+        if navigation:
+            file = MakeHtmlNavigation.from_file(file, encoding='utf8', number=False, text_catalogue=False)
+    if quit:
+        app.Quit()
+    return file

pyxllib 0.0.43__py3-none-any.whl → 0.3.197__py3-none-any.whl

pyxllib 0.0.43py3-none-any.whl → 0.3.197py3-none-any.whl