PyPI - pyDiffTools - Versions diffs - 0.1.6__py3-none-any.whl - Mend

pyDiffTools 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

pydifftools/__init__.py +10 -0
pydifftools/check_numbers.py +55 -0
pydifftools/command_line.py +818 -0
pydifftools/comment_functions.py +39 -0
pydifftools/continuous.py +166 -0
pydifftools/copy_files.py +75 -0
pydifftools/diff-doc.js +193 -0
pydifftools/doc_contents.py +107 -0
pydifftools/html_comments.py +33 -0
pydifftools/html_uncomments.py +524 -0
pydifftools/match_spaces.py +235 -0
pydifftools/onewordify.py +149 -0
pydifftools/onewordify_undo.py +54 -0
pydifftools/outline.py +68 -0
pydifftools/rearrange_tex.py +188 -0
pydifftools/searchacro.py +80 -0
pydifftools/separate_comments.py +77 -0
pydifftools/split_conflict.py +213 -0
pydifftools/unseparate_comments.py +72 -0
pydifftools/wrap_sentences.py +516 -0
pydifftools/xml2xlsx.vbs +33 -0
pydifftools-0.1.6.dist-info/METADATA +117 -0
pydifftools-0.1.6.dist-info/RECORD +27 -0
pydifftools-0.1.6.dist-info/WHEEL +5 -0
pydifftools-0.1.6.dist-info/entry_points.txt +2 -0
pydifftools-0.1.6.dist-info/licenses/LICENSE.md +28 -0
pydifftools-0.1.6.dist-info/top_level.txt +1 -0

pydifftools/comment_functions.py ADDED Viewed

@@ -0,0 +1,39 @@
+def comment_definition(commandname, name, comment_text):
+    return r"\newcommand{\%s}{%s}" % (commandname, comment_text) + "\n"
+def generate_alphabetnumber(x):
+    if x < 26:
+        return chr(ord("a") + x)
+    else:
+        higher_places = x // 26
+        return generate_alphabetnumber(
+            higher_places - 1
+        ) + generate_alphabetnumber(x - higher_places * 26)
+def matchingbrackets(content, startpoint, bracket_type):
+    if bracket_type == "(":
+        opening = "("
+        closing = ")"
+    elif bracket_type == "[":
+        opening = "["
+        closing = "]"
+    elif bracket_type == "{":
+        opening = "{"
+        closing = "}"
+    else:
+        raise ValueError("I didn't understand the type of bracket!")
+    first = (
+        False  # of course, don't want to break until we've found at least one
+    )
+    level = 0
+    for j in range(startpoint, len(content)):
+        if content[j] == opening:
+            if level == 0:
+                first = j
+            level += 1
+        if content[j] == closing:
+            level -= 1
+        if level == 0 and first:
+            return first, j

pydifftools/continuous.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""
+this requires geckodriver to be installed and available
+"""
+import time
+from selenium import webdriver
+import selenium
+import subprocess, sys, os, psutil, re
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+def run_pandoc(filename, html_file):
+    if os.path.exists("MathJax-3.1.2"):
+        has_local_jax = True
+    else:
+        has_local_jax = False
+        print("you don't have a local copy of mathjax.  You could get it with")
+        print(
+            "wget https://github.com/mathjax/MathJax/archive/refs/tags/3.1.2.zip"
+        )
+        print("and then unzip")
+    current_dir = os.getcwd()
+    localfiles = {}
+    for k in ["csl", "bib"]:
+        localfiles[k] = [
+            f for f in os.listdir(current_dir) if f.endswith("." + k)
+        ]
+        if len(localfiles[k]) == 1:
+            localfiles[k] = localfiles[k][0]
+        else:
+            raise ValueError(
+                f"You have more than one (or no) {k} file in this directory!"
+                " Get rid of all but one! of "
+                + "and".join(localfiles[k])
+            )
+    command = [
+        "pandoc",
+        "--bibliography",
+        localfiles["bib"],
+        f"--csl={localfiles['csl']}",
+        "--filter",
+        "pandoc-crossref",
+        "--citeproc",
+        "--mathjax",
+        "--number-sections",
+        "--toc",
+        "-s",
+        "-o",
+        html_file,
+        filename,
+    ]
+    # command = ['pandoc', '-s', '--mathjax', '-o', html_file, filename]
+    print("running:",' '.join(command))
+    subprocess.run(
+        command,
+    )
+    print("running:\n", command)
+    if has_local_jax:
+        # {{{ for slow internet connection, remove remote files
+        with open(html_file, encoding="utf-8") as fp:
+            text = fp.read()
+        patterns = [
+            r"<script.{0,20}?cdn\.jsdeli.{0,20}?mathjax.{0,60}?script>",
+            r"<script.{0,20}?https...polyfill.{0,60}?script>",
+        ]
+        for j in patterns:
+            text = re.sub(j, "", text, flags=re.DOTALL)
+        with open(html_file, "w", encoding="utf-8") as fp:
+            fp.write(text)
+        # }}}
+    return
+class Handler(FileSystemEventHandler):
+    def __init__(self, filename, observer):
+        self.observer = observer
+        self.filename = filename
+        self.html_file = filename.rsplit(".", 1)[0] + ".html"
+        # self.firefox = webbrowser.get('firefox')
+        # self.firefox = webdriver.Firefox() # requires geckodriver
+        self.init_firefox()
+    def init_firefox(self):
+        self.firefox = webdriver.Chrome()  # requires chromium
+        run_pandoc(self.filename, self.html_file)
+        if not os.path.exists(self.html_file):
+            print("html doesn't exist")
+        self.append_autorefresh()
+        # self.firefox.open_new_tab(self.html_file)
+        self.firefox.get("file://" + os.path.abspath(self.html_file))
+    def on_modified(self, event):
+        # print("modification event")
+        if os.path.normpath(
+            os.path.abspath(event.src_path)
+        ) == os.path.normpath(os.path.abspath(self.filename)):
+            # print("about to run pandoc")
+            run_pandoc(self.filename, self.html_file)
+            self.append_autorefresh()
+            try:
+                self.firefox.refresh()
+            except selenium.common.exceptions.WebDriverException:
+                print(
+                    "I'm quitting!! You probably suspended the computer, which"
+                    " seems to freak selenium out.  Just restart"
+                )
+                self.firefox.quit()
+                self.init_firefox()
+            print("and refreshed!")
+        else:
+            # print("saw a change in",os.path.normpath(os.path.abspath(event.src_path)))
+            # print("not",os.path.normpath(os.path.abspath(self.filename)))
+            pass
+    def append_autorefresh(self):
+        # print("about to add scripts")
+        with open(self.html_file, "r", encoding="utf-8") as fp:
+            all_data = fp.read()
+        all_data = all_data.replace(
+            "</head>",
+            """
+    <script id="MathJax-script" async src="MathJax-3.1.2/es5/tex-mml-chtml.js"></script>
+    <script>
+        // When the page is about to be unloaded, save the current scroll position
+        window.addEventListener('beforeunload', function() {
+            sessionStorage.setItem('scrollPosition', window.scrollY);
+        });
+        // When the page has loaded, scroll to the previous scroll position
+        window.addEventListener('load', function() {
+            var scrollPosition = sessionStorage.getItem('scrollPosition');
+            if (scrollPosition) {
+                window.scrollTo(0, scrollPosition);
+                sessionStorage.removeItem('scrollPosition');
+            }
+        });
+    </script>
+</head>
+    """,
+        )
+        with open(self.html_file, "w", encoding="utf-8") as fp:
+            fp.write(all_data)
+        # print("done adding")
+def watch(filename):
+    observer = Observer()
+    event_handler = Handler(filename, observer)
+    observer.schedule(event_handler, path=".", recursive=False)
+    observer.start()
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        observer.stop()
+    observer.join()
+    # print("returning from watch")
+if __name__ == "__main__":
+    filename = sys.argv[1]
+    watch(filename)
+    # Open the HTML file in the default web browser

pydifftools/copy_files.py ADDED Viewed

@@ -0,0 +1,75 @@
+# from https://tex.stackexchange.com/questions/24542/create-list-of-all-external-files-used-by-master-latex-document
+"""Copy figures used by document."""
+import os
+import shutil
+from pathlib import Path, PurePosixPath
+import subprocess
+import sys
+def copy_image_files(ROOT_TEX, project_name, TARGET_DIR, include_suppinfo):
+    all_files = []
+    with open(ROOT_TEX + ".tex", "r") as fp:
+        alltext = fp.read()
+    if r"\RequirePackage{snapshot}" not in alltext:
+        raise RuntimeError(
+            "You haven't called \\RequirePackage{snapshot} in the root tex file.  I can't do my thing without that!"
+        )
+    with open(ROOT_TEX + ".dep", "r") as f:
+        for line in f:
+            if "*{file}" in line:
+                value = line.split("{")[2].split("}")
+                source = value[0]
+                _, e = os.path.splitext(source)
+                if len(e) == 0 and os.path.exists(source + ".tex"):
+                    all_files.append(source + ".tex")
+                    print("found", source + ".tex")
+                elif os.path.exists(source):
+                    all_files.append(source)
+            elif "*{package}" in line:
+                value = line.split("{")[2].split("}")
+                source = value[0]
+                _, e = os.path.splitext(source)
+                if len(e) == 0 and os.path.exists(source + ".sty"):
+                    all_files.append(source + ".sty")
+                    print("found", source + ".sty")
+                elif os.path.exists(source):
+                    all_files.append(source)
+            else:
+                continue
+    os.makedirs(TARGET_DIR, exist_ok=True)
+    if include_suppinfo:
+        all_files.append("suppinfo.pdf")
+        all_files.append("suppinfo.aux")
+    for source in all_files:
+        d, f = os.path.split(source)
+        b, _ = os.path.splitext(source)
+        if b == ROOT_TEX:
+            f = f.replace(ROOT_TEX, "ms")
+        newpath = TARGET_DIR / d / f
+        print("copying", source, PurePosixPath(newpath))
+        if len(d) > 0:
+            print("going to make", newpath.parents[0])
+            os.makedirs(newpath.parents[0], exist_ok=True)
+        shutil.copy(source, PurePosixPath(newpath))
+    shutil.copy(ROOT_TEX + ".tex", os.path.join(TARGET_DIR, "ms.tex"))
+if __name__ == "__main__":
+    copy_image_files()
+    os.chdir(Path.cwd().parent)
+    output_filename = f"{project_name}_forarxiv.tgz"
+    # create tar process
+    tar = subprocess.Popen(
+        ["tar", "cf", "-", TARGET_DIR.name], stdout=subprocess.PIPE
+    )
+    # create gzip process, using tar's stdout as its stdin
+    gzip = subprocess.Popen(
+        ["gzip", "-9"], stdin=tar.stdout, stdout=subprocess.PIPE
+    )
+    # close tar's stdout so it doesn't hang around waiting for input
+    tar.stdout.close()
+    # write gzip's stdout to a file
+    with open(output_filename, "wb") as fp:
+        shutil.copyfileobj(gzip.stdout, fp)
+    gzip.stdout.close()

pydifftools/diff-doc.js ADDED Viewed

@@ -0,0 +1,193 @@
+//
+// TortoiseSVN Diff script for Word Doc files
+//
+// Copyright (C) 2004-2008 the TortoiseSVN team
+// This file is distributed under the same license as TortoiseSVN
+//
+// Last commit by:
+// $Author$
+// $Date$
+// $Rev$
+//
+// Authors:
+// Jared Silva, 2008
+// Davide Orlandi and Hans-Emil Skogh, 2005
+//
+var objArgs,num,sBaseDoc,sNewDoc,sTempDoc,objScript,word,destination;
+// Microsoft Office versions for Microsoft Windows OS
+var vOffice2000 = 9;
+var vOffice2002 = 10;
+var vOffice2003 = 11;
+var vOffice2007 = 12;
+// WdCompareTarget
+var wdCompareTargetSelected = 0;
+var wdCompareTargetCurrent = 1;
+var wdCompareTargetNew = 2;
+// WdViewType
+var wdMasterView = 5;
+var wdNormalView = 1;
+var wdOutlineView = 2;
+// WdSaveOptions
+var wdDoNotSaveChanges = 0;
+var wdPromptToSaveChanges = -2;
+var wdSaveChanges = -1;
+objArgs = WScript.Arguments;
+num = objArgs.length;
+if (num < 2)
+{
+   WScript.Echo("Usage: [CScript | WScript] diff-doc.js base.doc new.doc");
+   WScript.Quit(1);
+}
+sBaseDoc = objArgs(0);
+sNewDoc = objArgs(1);
+objScript = new ActiveXObject("Scripting.FileSystemObject");
+if ( ! objScript.FileExists(sBaseDoc))
+{
+    WScript.Echo("File " + sBaseDoc + " does not exist.  Cannot compare the documents.");
+    WScript.Quit(1);
+}
+if ( ! objScript.FileExists(sNewDoc))
+{
+    WScript.Echo("File " + sNewDoc + " does not exist.  Cannot compare the documents.");
+    WScript.Quit(1);
+}
+try
+{
+   word = WScript.CreateObject("Word.Application");
+}
+catch(e)
+{
+	// before giving up, try with OpenOffice
+	try
+	{
+		var OO;
+		OO = WScript.CreateObject("com.sun.star.ServiceManager");
+	}
+	catch(e)
+	{
+		WScript.Echo("You must have Microsoft Word or OpenOffice installed to perform this operation.");
+		WScript.Quit(1);
+	}
+	// yes, OO is installed - do the diff with that one instead
+	var objFile = objScript.GetFile(sNewDoc);
+	if ((objFile.Attributes & 1)==1)
+	{
+		// reset the readonly attribute
+		objFile.Attributes = objFile.Attributes & (~1);
+	}
+	//Create the DesktopSet
+	var objDesktop = OO.createInstance("com.sun.star.frame.Desktop");
+	var objUriTranslator = OO.createInstance("com.sun.star.uri.ExternalUriReferenceTranslator");
+	//Adjust the paths for OO
+	sBaseDoc = sBaseDoc.replace(/\\/g, "/");
+	sBaseDoc = sBaseDoc.replace(/:/g, "|");
+	sBaseDoc = sBaseDoc.replace(/ /g, "%20");
+	sBaseDoc="file:///" + sBaseDoc;
+	sBaseDoc=objUriTranslator.translateToInternal(sBaseDoc);
+	sNewDoc = sNewDoc.replace(/\\/g, "/");
+	sNewDoc = sNewDoc.replace(/:/g, "|");
+	sNewDoc = sNewDoc.replace(/ /g, "%20");
+	sNewDoc="file:///" + sNewDoc;
+	sNewDoc=objUriTranslator.translateToInternal(sNewDoc);
+	//Open the %base document
+	var oPropertyValue = new Array();
+	oPropertyValue[0] = OO.Bridge_GetStruct("com.sun.star.beans.PropertyValue");
+	oPropertyValue[0].Name = "ShowTrackedChanges";
+	oPropertyValue[0].Value = true;
+	var objDocument=objDesktop.loadComponentFromURL(sNewDoc,"_blank", 0, oPropertyValue);
+	//Set the frame
+	var Frame = objDesktop.getCurrentFrame();
+	var dispatcher=OO.CreateInstance("com.sun.star.frame.DispatchHelper");
+	//Execute the comparison
+	dispatcher.executeDispatch(Frame, ".uno:ShowTrackedChanges", "", 0, oPropertyValue);
+	oPropertyValue[0].Name = "URL";
+	oPropertyValue[0].Value = sBaseDoc;
+	dispatcher.executeDispatch(Frame, ".uno:CompareDocuments", "", 0, oPropertyValue);
+	WScript.Quit(0);
+}
+if (parseInt(word.Version) >= vOffice2007)
+{
+	sTempDoc = sNewDoc;
+	sNewDoc = sBaseDoc;
+	sBaseDoc = sTempDoc;
+}
+objScript = null;
+word.visible = true;
+// Open the new document
+try
+{
+    destination = word.Documents.Open(sNewDoc, true, true);
+}
+catch(e)
+{
+    WScript.Echo("Error opening " + sNewDoc);
+    // Quit
+    WScript.Quit(1);
+}
+// If the Type property returns either wdOutlineView or wdMasterView and the Count property returns zero, the current document is an outline.
+if (((destination.ActiveWindow.View.Type == wdOutlineView) || (destination.ActiveWindow.View.Type == wdMasterView)) && (destination.Subdocuments.Count == 0))
+{
+    // Change the Type property of the current document to normal
+    destination.ActiveWindow.View.Type = wdNormalView;
+}
+// Compare to the base document
+if (parseInt(word.Version) <= vOffice2000)
+{
+    // Compare for Office 2000 and earlier
+    try
+    {
+        destination.Compare(sBaseDoc);
+    }
+    catch(e)
+    {
+        WScript.Echo("Error comparing " + sBaseDoc + " and " + sNewDoc);
+        // Quit
+        WScript.Quit(1);
+    }
+}
+else
+{
+    // Compare for Office XP (2002) and later
+    try
+    {
+        destination.Compare(sBaseDoc, "Comparison", wdCompareTargetNew, true, true);
+    }
+    catch(e)
+    {
+        WScript.Echo("Error comparing " + sBaseDoc + " and " + sNewDoc);
+        // Close the first document and quit
+        destination.Close(wdDoNotSaveChanges);
+        WScript.Quit(1);
+    }
+}
+// Show the comparison result
+if (parseInt(word.Version) < vOffice2007)
+{
+	word.ActiveDocument.Windows(1).Visible = 1;
+}
+// Mark the comparison document as saved to prevent the annoying
+// "Save as" dialog from appearing.
+word.ActiveDocument.Saved = 1;
+// Close the first document
+if (parseInt(word.Version) >= vOffice2002)
+{
+    destination.Close(wdDoNotSaveChanges);
+}

pydifftools/doc_contents.py ADDED Viewed

@@ -0,0 +1,107 @@
+from collections import OrderedDict
+from fuzzywuzzy import process
+class doc_contents_class(object):
+    prefix = {
+        "section": "",
+        "subsection": "\t",
+        "subsubsection": 2 * "\t",
+        "paragraph": 3 * "\t",
+    }
+    inv_prefix = {v: k for k, v in prefix.items()}
+    def __init__(self):
+        self.contents = OrderedDict()
+        self.contents["header"] = ""
+        self.types = {}
+        self.types["header"] = "header"
+        self._reordering_started = False
+        self._aliases = {}
+    def start_sec(self, thistype, thistitle):
+        assert thistitle not in self.contents.keys(), (
+            "more than one section with the name:\n" + thistitle
+        )
+        self.contents[thistitle] = ""
+        self.types[thistitle] = thistype
+        print("added", thistitle)
+    def __setstate__(self, d):
+        "set the info from a pickle"
+        self.contents = d["contents"]
+        self.types = d["types"]
+        self._aliases = {} # doesn't exist, but still needed
+        self._reordering_started = False
+        return
+    def __getstate__(self):
+        "return info for a pickle"
+        return {
+            "contents": self.contents,
+            "types": self.types,
+        }
+    def __iadd__(self, value):
+        self.contents[next(reversed(self.contents))] += value
+        return self
+    def __str__(self):
+        if len(self._processed_titles) > 0:
+            raise ValueError("the following section"
+            " titles were not utilized -- this program is"
+            " for reordering, not dropping!:\n"+str(self._processed_titles))
+        retval = ""
+        for j in self.contents.keys():
+            if self.types[j] != "header":
+                new_name = j
+                if j in self._aliases.keys():
+                    new_name = self._aliases[j]
+                retval += f"\\{self.types[j]}{{{new_name}}}"
+            retval += f"{self.contents[j]}"
+        return retval
+    @property
+    def outline(self):
+        retval = []
+        for j in self.contents.keys():
+            if self.types[j] != "header":
+                thistitle = (self.prefix[self.types[j]] + "\t").join(j.split("\n"))
+                retval.append(self.prefix[self.types[j]] + "*\t" + thistitle)
+        self._reordering_started = False
+        return "\n".join(retval)
+    def outline_in_order(self, thisline):
+        if not self._reordering_started:
+            self._processed_titles = [j for j in self.contents.keys()
+                    if self.types[j] != 'header']
+            self._reordering_started = True
+        ilevel = 0
+        spacelevel = 0
+        hitmarker = False
+        for j, thischar in enumerate(thisline):
+            if not hitmarker:
+                if thischar == " ":
+                    spacelevel += 1
+                if spacelevel == 4 or thischar == "\t":
+                    ilevel += 1
+                    spacelevel = 0
+                elif thischar == "*":
+                    hitmarker = True
+            else:
+                assert thischar in [" ", "\t"]
+                title = thisline[j + 1 :]
+                break
+        if not hitmarker:
+            raise ValueError("somehow, there wasn't a * marker!")
+        if title not in self.contents.keys():
+            best_match, match_quality = process.extractOne(title, self.contents.keys())
+            yesorno = input(f"didn't find\n\t{title}\nin keys, maybe you want\n\t{best_match}\nsay y or n")
+            if yesorno == 'y':
+                self._aliases[best_match] = title # will be replaced later
+                title = best_match
+            else:
+                raise ValueError("problem with replacement")
+        self.contents.move_to_end(title)
+        self._processed_titles.remove(title)
+        self.types[title] = self.inv_prefix[ilevel * "\t"]

pydifftools/html_comments.py ADDED Viewed

@@ -0,0 +1,33 @@
+# again rerun
+from lxml import html, etree
+import os
+from pyspecdata.fornotebook import *
+from pyspecdata import *
+import re
+fp = open(sys.argv[1], "r")
+content = fp.read()
+fp.close()
+doc = html.fromstring(content)
+commentlabel_re = re.compile(r"\[([A-Z]+)([0-9])\]")
+comment_dict = {}
+# for j in doc.xpath('descendant::*[@style="mso-element:comment"]'):
+newlist = []
+thisbody = doc.find("body")
+print("I found the body", lsafen(thisbody))
+commentlist = etree.Element("div", style="mso-element:comment-list")
+for j in doc.xpath('//span[@style="mso-element:comment"]'):
+    # for j in doc.xpath('//span[@style="mso-element:comment"]'):
+    # print 'found span with style:\n\n',lsafen(html.tostring(j),wrap = 60)
+    # if j.attrib['style'] == 'mso-element:comment':
+    print("found span with style:\n\n", lsafen(j.attrib, wrap=60))
+    newlist.append(j)
+    j.drop_tree()
+    commentlist.append(j)
+thisbody.append(commentlist)
+# print lsafen(map(html.tostring,newlist),wrap = 60)
+newfile = re.sub(r"(.*)(\.htm.*)", r"\1_htmlcomm\2", sys.argv[1])
+fp = open(newfile, "w")
+content = html.tostring(doc)
+fp.write(content)
+fp.close()