pyDiffTools 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,319 @@
1
+ #!/usr/bin/env python3
2
+ """this script has been entirely vibe-coded based on the tex example included
3
+ in the repo!"""
4
+
5
+ import re
6
+ import sys
7
+ import subprocess
8
+ import tempfile
9
+ import shutil
10
+ from pathlib import Path
11
+
12
+ from pydifftools.command_registry import register_command
13
+
14
+
15
+ def find_matching(text: str, start: int, open_ch: str, close_ch: str) -> int:
16
+ """Return index of matching close_ch for open_ch at *start* or -1."""
17
+ depth = 1
18
+ i = start + 1
19
+ while i < len(text):
20
+ c = text[i]
21
+ if c == open_ch:
22
+ depth += 1
23
+ elif c == close_ch:
24
+ depth -= 1
25
+ if depth == 0:
26
+ return i
27
+ i += 1
28
+ return -1
29
+
30
+
31
+ def preprocess_latex(src: str) -> str:
32
+ """Convert custom environments and observation macros before pandoc."""
33
+
34
+ def repl_python(m: re.Match) -> str:
35
+ """Preserve python blocks exactly using markers."""
36
+ code = m.group(1)
37
+ return (
38
+ "\\begin{verbatim}\n%%PYTHON_START%%\n"
39
+ + code
40
+ + "%%PYTHON_END%%\n\\end{verbatim}"
41
+ )
42
+
43
+ def repl_verbatim(m: re.Match) -> str:
44
+ """Mark generic verbatim blocks for fenced conversion."""
45
+ newline = m.group(1)
46
+ body = m.group(2)
47
+ if "%%PYTHON_START%%" in body:
48
+ return m.group(0)
49
+ return (
50
+ f"\\begin{{verbatim}}{newline}%%VERBATIM_START%%\n"
51
+ + body
52
+ + "%%VERBATIM_END%%\n\\end{verbatim}"
53
+ )
54
+
55
+ # replace python environment with verbatim + markers without touching
56
+ # the whitespace contained in the block
57
+ src = re.sub(
58
+ r"\\begin{python}(?:\[[^\]]*\])?\n(.*?)\\end{python}",
59
+ repl_python,
60
+ src,
61
+ flags=re.S,
62
+ )
63
+
64
+ # mark standard verbatim blocks so they convert to fenced code later
65
+ src = re.sub(
66
+ r"\\begin{verbatim}(\n?)(.*?)\\end{verbatim}",
67
+ repl_verbatim,
68
+ src,
69
+ flags=re.S,
70
+ )
71
+
72
+ # convert err environment so pandoc will parse inside while preserving
73
+ # the whitespace exactly
74
+ src = re.sub(
75
+ r"\\begin{err}\n?(.*?)\\end{err}",
76
+ lambda m: f"<err>{m.group(1)}</err>",
77
+ src,
78
+ flags=re.S,
79
+ )
80
+
81
+ # handle \o[...]{} and \o{} observations
82
+ out = []
83
+ i = 0
84
+ while True:
85
+ idx_bracket = src.find("\\o[", i)
86
+ idx_brace = src.find("\\o{", i)
87
+ idxs = [x for x in (idx_bracket, idx_brace) if x != -1]
88
+ idx = min(idxs) if idxs else -1
89
+ if idx == -1:
90
+ out.append(src[i:])
91
+ break
92
+ out.append(src[i:idx])
93
+ j = idx + 2
94
+ attrs = ""
95
+ if j < len(src) and src[j] == "[":
96
+ end_attrs = find_matching(src, j, "[", "]")
97
+ if end_attrs == -1:
98
+ out.append(src[idx:])
99
+ break
100
+ attrs = src[j + 1 : end_attrs]
101
+ j = end_attrs + 1
102
+ if j >= len(src) or src[j] != "{":
103
+ out.append(src[idx : idx + 2])
104
+ i = idx + 2
105
+ continue
106
+ end_body = find_matching(src, j, "{", "}")
107
+ if end_body == -1:
108
+ out.append(src[idx:])
109
+ break
110
+ body = src[j + 1 : end_body]
111
+ j = end_body + 1
112
+ if attrs:
113
+ m = re.match(r"(.*?)\s*(\(([^)]+)\))?$", attrs.strip())
114
+ time = m.group(1).strip() if m else attrs.strip()
115
+ author = m.group(3) if m else None
116
+ tag = (
117
+ f'<obs time="{time}"'
118
+ + (f' author="{author}"' if author else "")
119
+ + f">{body}</obs>"
120
+ )
121
+ else:
122
+ tag = f"<obs>{body}</obs>"
123
+ out.append(tag)
124
+ i = j
125
+ return "".join(out)
126
+
127
+
128
+ def clean_html_escapes(text: str) -> str:
129
+ return text.replace("\\<", "<").replace("\\>", ">").replace('\\"', '"')
130
+
131
+
132
+ def finalize_markers(text: str) -> str:
133
+ lines = []
134
+ in_py = False
135
+ need_reset = False
136
+ in_verb = False
137
+ for line in text.splitlines(keepends=True):
138
+ if re.match(r"^\s*%%PYTHON_START%%", line):
139
+ lines.append("```{python}\n")
140
+ in_py = True
141
+ need_reset = True
142
+ continue
143
+ if re.match(r"^\s*%%PYTHON_END%%", line):
144
+ lines.append("```\n")
145
+ in_py = False
146
+ continue
147
+ if re.match(r"^\s*%%VERBATIM_START%%", line):
148
+ lines.append("```\n")
149
+ in_verb = True
150
+ continue
151
+ if re.match(r"^\s*%%VERBATIM_END%%", line):
152
+ lines.append("```\n")
153
+ in_verb = False
154
+ continue
155
+ if in_py:
156
+ stripped = line[4:] if line.startswith(" ") else line
157
+ if need_reset:
158
+ if stripped.lstrip().startswith("%reset"):
159
+ lines.append(stripped)
160
+ else:
161
+ lines.append("%reset -f\n")
162
+ lines.append(stripped)
163
+ need_reset = False
164
+ else:
165
+ lines.append(stripped)
166
+ elif in_verb and line.startswith(" "):
167
+ lines.append(line[4:])
168
+ else:
169
+ lines.append(line)
170
+ return "".join(lines)
171
+
172
+
173
+ def format_observations(text: str) -> str:
174
+ """Ensure observation tags sit on a single line without altering
175
+ content."""
176
+
177
+ obs_re = re.compile(r"(<obs[^>]*>)(.*?)(</obs>)", flags=re.S)
178
+
179
+ def repl(match: re.Match) -> str:
180
+ open_tag = match.group(1).strip()
181
+ body = match.group(2)
182
+ close_tag = match.group(3).strip()
183
+ # trim newlines that may surround the body but keep internal whitespace
184
+ body = body.lstrip("\n").rstrip("\n")
185
+ return f"{open_tag}{body}{close_tag}"
186
+
187
+ return obs_re.sub(repl, text)
188
+
189
+
190
+ def format_tags(text: str, indent_str: str = " ") -> str:
191
+ """Format <err> blocks with indentation and tidy <obs> tags."""
192
+ text = format_observations(text)
193
+ # normalize whitespace around err tags
194
+ text = re.sub(r"<err>[ \t]*\n+", "<err>\n", text)
195
+ text = re.sub(r"<err>[ \t]+", "<err>\n", text)
196
+ text = re.sub(r"</err>[ \t]+", "</err>", text)
197
+ # ensure opening obs tags start on a new line without collapsing blank
198
+ # lines
199
+ text = re.sub(r"(\n+)[ \t]*(<obs)", r"\1\2", text)
200
+ text = re.sub(r"(?<!^)(?<!\n)(<obs)", r"\n\1", text)
201
+ # ensure a newline after closing obs tags but keep extra blank lines
202
+ text = re.sub(r"</obs>[ \t]+", "</obs>", text)
203
+ text = re.sub(r"</obs>(?!\n)", "</obs>\n", text)
204
+ pattern = re.compile(r"(<err>|</err>)")
205
+ parts = pattern.split(text)
206
+ out = []
207
+ indent = 0
208
+ prev_tag = None
209
+ for part in parts:
210
+ if not part:
211
+ continue
212
+ if part == "<err>":
213
+ if out and not out[-1].endswith("\n"):
214
+ out[-1] = out[-1].rstrip() + "\n"
215
+ out.append(indent_str * indent + "<err>\n")
216
+ indent += 1
217
+ prev_tag = "<err>"
218
+ elif part == "</err>":
219
+ if out and not out[-1].endswith("\n"):
220
+ out[-1] = out[-1].rstrip() + "\n"
221
+ indent -= 1
222
+ out.append(indent_str * indent + "</err>\n")
223
+ prev_tag = "</err>"
224
+ else:
225
+ # Keep err contents tight while
226
+ # forcing a blank line after
227
+ # closing tags so pandoc treats
228
+ # the debug block as a standalone
229
+ # HTML block. The newline handling
230
+ # below ensures content after a
231
+ # closing </err> tag always starts
232
+ # on its own paragraph line.
233
+ if prev_tag == "<err>" and part.startswith("\n"):
234
+ part = part[1:]
235
+ if prev_tag == "</err>" and not part.startswith("\n"):
236
+ part = "\n" + part
237
+ lines = part.splitlines(True)
238
+ for line in lines:
239
+ if line.strip():
240
+ out.append(indent_str * indent + line)
241
+ else:
242
+ out.append(line)
243
+ prev_tag = None
244
+ formatted = "".join(out)
245
+ return re.sub(r"[ \t]+(?=\n)", "", formatted)
246
+
247
+
248
+ @register_command(
249
+ "Convert LaTeX sources to Quarto Markdown (.qmd) files",
250
+ help={"tex": "Input .tex file to convert"},
251
+ )
252
+ def tex2qmd(tex):
253
+ """Convert ``tex`` to a .qmd file and return the output path."""
254
+
255
+ inp = Path(tex)
256
+ if not inp.exists():
257
+ print(f"File not found: {inp}", file=sys.stderr)
258
+ sys.exit(1)
259
+
260
+ base = inp.with_suffix("")
261
+ src = inp.read_text()
262
+ pre_content = preprocess_latex(src)
263
+
264
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".tex") as pre:
265
+ pre.write(pre_content.encode())
266
+ pre_path = pre.name
267
+
268
+ mid_fd, mid_path = tempfile.mkstemp()
269
+ Path(mid_path).unlink() # we just want the name; pandoc will create it
270
+
271
+ try:
272
+ # Prefer Quarto's bundled pandoc when available so the conversion
273
+ # matches Quarto defaults, but fall back to a standalone pandoc
274
+ # installation when Quarto is not on PATH.
275
+ quarto = shutil.which("quarto")
276
+ pandoc = shutil.which("pandoc")
277
+ if quarto:
278
+ cmd = [quarto, "pandoc"]
279
+ else:
280
+ cmd = [pandoc]
281
+ cmd += [
282
+ pre_path,
283
+ "-f",
284
+ "latex",
285
+ "-t",
286
+ "markdown",
287
+ "--wrap=none",
288
+ "-o",
289
+ mid_path,
290
+ ]
291
+ subprocess.run(cmd, check=True)
292
+ finally:
293
+ Path(pre_path).unlink(missing_ok=True)
294
+
295
+ mid_text = Path(mid_path).read_text()
296
+ Path(mid_path).unlink(missing_ok=True)
297
+
298
+ clean_text = clean_html_escapes(mid_text)
299
+ final_text = finalize_markers(clean_text)
300
+ formatted = format_tags(final_text)
301
+ out_path = base.with_suffix(".qmd")
302
+ out_path.write_text(formatted)
303
+ print(f"Wrote {out_path}")
304
+ return out_path
305
+
306
+
307
+ def main():
308
+ if len(sys.argv) != 2:
309
+ print("Usage: tex_to_qmd.py file.tex", file=sys.stderr)
310
+ sys.exit(1)
311
+ tex2qmd(sys.argv[1])
312
+
313
+
314
+ if __name__ == "__main__":
315
+ main()
316
+
317
+
318
+ # Maintain the previous helper name for any existing imports.
319
+ convert_tex_to_qmd = tex2qmd
@@ -0,0 +1,149 @@
1
+ # again rerun
2
+ from lxml import html, etree
3
+ import os
4
+ from matlablike import *
5
+ from unidecode import unidecode
6
+ import re
7
+ import sys
8
+
9
+ fp = open(sys.argv[1], "r")
10
+ paragraphcommands_re = re.compile(r"^ *\\(sub)*paragraph{.*}")
11
+ commentline_re = re.compile(r"^ *%")
12
+ beginlatex_re = re.compile(r"^[^#]*\\begin{document}(.*)")
13
+ endlatex_re = re.compile(r"^([^#]*)\\end{document}.*")
14
+ commandstart_re = re.compile(r"(\\[a-zA-Z]+[\[{])")
15
+ word_citation_re = re.compile(r"(\[[0-9 ,]+\][,\.)]*)")
16
+ tex_citation_re = re.compile(r"(.*)(\\cite{[a-zA-Z0-9,_]+}[,\.)]*)(.*)$")
17
+ tex_ref_re = re.compile(r"(.*)(\\c*ref{[a-zA-Z0-9,_:\-]+}[,\.)]*)(.*)$")
18
+ text_list = []
19
+ if sys.argv[1][-4:] == ".tex":
20
+ latex_file = True
21
+ else:
22
+ latex_file = False
23
+ found_beginning = False
24
+ start_line = 0
25
+ end_line = 0
26
+ print("opened", sys.argv[1])
27
+ # {{{ pull out just the part between the document text
28
+ j = 0
29
+ for thisline in fp:
30
+ thisline = thisline.replace("\xa0", " ") # because word sucks
31
+ thisline = thisline.replace(
32
+ "\x93", "``"
33
+ ) # this and following are just pulled from vim
34
+ thisline = thisline.replace("\x94", "''")
35
+ thisline = thisline.replace("\x96", "--")
36
+ j += 1
37
+ if latex_file:
38
+ if not found_beginning:
39
+ thismatch = beginlatex_re.match(thisline)
40
+ if thismatch:
41
+ text_list.append(thismatch.groups()[0].rstrip())
42
+ found_beginning = True
43
+ start_line = j + 1
44
+ print("Found the beginning at line", start_line)
45
+ else:
46
+ thismatch = endlatex_re.match(thisline)
47
+ if thismatch:
48
+ text_list.append(thismatch.groups()[0].rstrip())
49
+ print("Found the end")
50
+ end_line = j
51
+ print("Found the end at line", end_line)
52
+ text_list.append(thisline)
53
+ else:
54
+ text_list.append(
55
+ thisline.replace("$$", "")
56
+ ) # no better place to check for the tex dollar sign double-up
57
+ if end_line == 0:
58
+ end_line = len(text_list)
59
+ fp.close()
60
+ j = 0
61
+ while j < len(
62
+ text_list
63
+ ): # first, put citations on their own line, so I can next treat them as special lines
64
+ thismatch = tex_citation_re.match(text_list[j])
65
+ othermatch = tex_ref_re.match(text_list[j])
66
+ if othermatch:
67
+ thismatch = othermatch
68
+ if thismatch:
69
+ text_list.pop(j)
70
+ text_list.insert(
71
+ j, thismatch.groups()[2]
72
+ ) # push on backwards, so it shows up in the right order
73
+ text_list.insert(
74
+ j,
75
+ thismatch.groups()[1].replace(" ", "\n%SPACE%\n")
76
+ + "%NONEWLINE%\n",
77
+ ) # since these are "fake" newlines, make sure they don't get broken! -- also to preserve spaces, I'm pre-processing the spacing here
78
+ text_list.insert(
79
+ j,
80
+ thismatch.groups()[0].replace(" ", "\n%SPACE%\n")
81
+ + "%NONEWLINE%\n",
82
+ )
83
+ print(
84
+ "found citation or reference, broke line:",
85
+ text_list[j],
86
+ text_list[j + 1],
87
+ text_list[j + 2],
88
+ )
89
+ print("---")
90
+ j += 1 # so that we skip the citation we just added
91
+ end_line += 2 # because we added two lines
92
+ j += 1
93
+ for j in range(0, len(text_list)):
94
+ thismatch = paragraphcommands_re.match(text_list[j])
95
+ if thismatch:
96
+ text_list[j] = text_list[j].replace(
97
+ "\n", "%NEWLINE%\n"
98
+ ) # these lines are protected/preserved from being chopped up, since they are invisible
99
+ print("found paragraph line:", text_list[j])
100
+ else:
101
+ thismatch = tex_citation_re.match(text_list[j])
102
+ if not thismatch:
103
+ thismatch = tex_ref_re.match(text_list[j])
104
+ if thismatch:
105
+ print("found citation line:", text_list[j])
106
+ else:
107
+ text_list[j] = text_list[j].replace("~", "\n~\n")
108
+ text_list[j] = commandstart_re.sub("\\1\n", text_list[j])
109
+ text_list[j] = word_citation_re.sub("\n\\1\n", text_list[j])
110
+ text_list[j] = text_list[j].replace("}", "\n}\n")
111
+ text_list[j] = text_list[j].replace("]{", "\n]{\n")
112
+ text_list[j] = text_list[j].replace(" ", "\n%SPACE%\n")
113
+ if text_list[j][-12:] == "%NONEWLINE%\n":
114
+ print("trying to drop NONEWLINE going from:")
115
+ print(text_list[j])
116
+ text_list[j] = text_list[j][:-12] + "\n"
117
+ print("to:\n", text_list[j])
118
+ else:
119
+ print("line ends in:", text_list[j][-12:])
120
+ text_list[j] += "%NEWLINE%\n"
121
+ text_list[j] = text_list[j].replace("\r", "\n%NEWLINE%\n")
122
+ # }}}
123
+ # {{{ write out the result
124
+ outputtext = "".join(text_list)
125
+ outputtext = outputtext.split("\n")
126
+ outputtext = [j for j in outputtext if len(j) > 0]
127
+ if not latex_file: # easier to just strip the tags here
128
+ print("this is not a latex file")
129
+ outputtext = [j for j in outputtext if j != "%SPACE%" and j != "%NEWLINE%"]
130
+ else:
131
+ print("this is a latex file")
132
+ outputtex = "".join(
133
+ text_list[start_line:end_line]
134
+ ) # up to but not including the end document
135
+ outputtex = outputtex.split("\n")
136
+ outputtex = [j for j in outputtex if len(j) > 0]
137
+ outputtex = [
138
+ j for j in outputtex if j[0] != "%"
139
+ ] # takes care of space and newline as well as tex comments
140
+ newfile = re.sub(r"(.*)(\..*)", r"\1_1word\2", sys.argv[1])
141
+ fp = open(newfile, "w")
142
+ fp.write("\n".join(outputtext))
143
+ fp.close()
144
+ if latex_file:
145
+ newfile = re.sub(r"(.*)(\..*)", r"\1_1wordstripped\2", sys.argv[1])
146
+ fp = open(newfile, "w")
147
+ fp.write("\n".join(outputtex))
148
+ fp.close()
149
+ # }}}
@@ -0,0 +1,54 @@
1
+ # again rerun
2
+ from lxml import html, etree
3
+ import os
4
+ from matlablike import *
5
+ from unidecode import unidecode
6
+ import re
7
+ import sys
8
+
9
+ fp = open(sys.argv[1], "r")
10
+ needsspace_re = re.compile(r'(\w[):;"\-\.,!?}]*) +(["(]*\w)')
11
+ paragraphcommands_re = re.compile(r"^ *\\(sub)*paragraph{.*}")
12
+ commentline_re = re.compile(r"^ *%")
13
+ normalline_re = re.compile(r"^\(%SPACE%\)\|\(%NEWLINE%\)")
14
+ notweird_re = re.compile(r"^(%SPACE%)|(%\[ORIG%)|(%ORIG\]\[NEW%)|(%NEW\]%)")
15
+ text_list = []
16
+ found_beginning = False
17
+ print("opened", sys.argv[1])
18
+ # {{{ pull out just the part between the document text
19
+ for thisline in fp:
20
+ if (
21
+ (thisline[:7] == "<<<<<<<")
22
+ or (thisline[:7] == "=======")
23
+ or (thisline[:7] == ">>>>>>>")
24
+ ):
25
+ text_list.append(
26
+ "%NEWLINE% %CONFLICT%" + thisline.strip() + "%NEWLINE%"
27
+ )
28
+ else:
29
+ text_list.append(thisline.rstrip())
30
+ fp.close()
31
+ text_list = [x.replace("%NEWLINE%", "\n") for x in text_list]
32
+ # {{{ don't mess with the "special" lines
33
+ for j, thisline in enumerate(text_list):
34
+ if not notweird_re.match(thisline):
35
+ if paragraphcommands_re.match(thisline) or commentline_re.match(
36
+ thisline
37
+ ):
38
+ print("found special line '", thisline, "'")
39
+ text_list[j] = thisline.replace(" ", " %SPACE% ")
40
+ # }}}
41
+ text_list = " ".join(text_list)
42
+ text_list = needsspace_re.sub(r"\1 %SPACE% \2", text_list)
43
+ text_list = needsspace_re.sub(
44
+ r"\1 %SPACE% \2", text_list
45
+ ) # again to catch the single letter ones
46
+ text_list = text_list.replace(" ", "")
47
+ text_list = text_list.replace("%SPACE%", " ")
48
+ # {{{ write out the result
49
+ newfile = re.sub(r"(.*)(\..*)", r"\1_1wordcollapse\2", sys.argv[1])
50
+ fp = open(newfile, "w")
51
+ outputtext = "".join(text_list)
52
+ fp.write(outputtext)
53
+ fp.close()
54
+ # }}}
pydifftools/outline.py ADDED
@@ -0,0 +1,173 @@
1
+ import pickle
2
+ from .doc_contents import doc_contents_class
3
+ import re
4
+
5
+ from .command_registry import register_command
6
+
7
+
8
+ def _write_outline_files(all_contents, basename):
9
+ # save the reordered data and user-editable outline sidecar
10
+ with open(f"{basename}_outline.pickle", "wb") as fp:
11
+ pickle.dump(all_contents, fp)
12
+ with open(f"{basename}_outline.md", "w", encoding="utf-8") as fp:
13
+ fp.write(all_contents.outline)
14
+
15
+
16
+ def extract_outline(filename):
17
+ basename = filename.replace(".tex", "")
18
+ section_re = re.compile(
19
+ r"\\(paragraph|subparagraph|subsubsection|subsection|section)\{"
20
+ )
21
+
22
+ all_contents = doc_contents_class()
23
+ bracelevel = 0
24
+ with open(filename, "r", encoding="utf-8") as fp:
25
+ for thisline in fp:
26
+ if bracelevel == 0:
27
+ thismatch = section_re.match(thisline)
28
+ if thismatch:
29
+ sectype = thismatch.groups()[0]
30
+ bracelevel = 1
31
+ all_contents += thisline[: thismatch.start()]
32
+ escaped = False
33
+ thistitle = ""
34
+ else:
35
+ all_contents += thisline
36
+ if (
37
+ bracelevel > 0
38
+ ): # do this whether continued open brace from previous line,
39
+ # or if we opened brace on previous
40
+ for n, j in enumerate(thisline[thismatch.end() :]):
41
+ if escaped:
42
+ escaped = False
43
+ elif j == "\\":
44
+ escaped = True
45
+ elif j == "{":
46
+ bracelevel += 1
47
+ elif j == "}":
48
+ bracelevel -= 1
49
+ if bracelevel > 0:
50
+ thistitle += j
51
+ else:
52
+ all_contents.start_sec(sectype, thistitle)
53
+ all_contents += thisline[thismatch.end() + n + 1 :]
54
+ break
55
+ else: # hit the end of the line without the break
56
+ thisline += "\n"
57
+ _write_outline_files(all_contents, basename)
58
+
59
+
60
+ def _reorder_from_outline(targetfile, extension, format_type):
61
+ # rebuild a file based on the user-adjusted outline list
62
+ markdownfile = targetfile.replace(extension, "_outline.md")
63
+ picklefile = targetfile.replace(extension, "_outline.pickle")
64
+ if not (
65
+ markdownfile.endswith(".md")
66
+ and picklefile.endswith(".pickle")
67
+ and targetfile.endswith(extension)
68
+ ):
69
+ raise ValueError("pass 1 argument: target file (output)")
70
+
71
+ with open(picklefile, "rb") as fp:
72
+ all_contents = pickle.load(fp)
73
+ all_contents.set_format(format_type)
74
+ with open(markdownfile, "r", encoding="utf-8") as fp:
75
+ for thisline in fp:
76
+ all_contents.outline_in_order(thisline.rstrip())
77
+ with open(targetfile, "w", encoding="utf-8", newline="\n") as fp:
78
+ fp.write(str(all_contents))
79
+
80
+
81
+ @register_command(
82
+ "Save tex file as outline, with filename_outline.pickle storing content",
83
+ " and filename_outline.md giving outline.",
84
+ )
85
+ def xo(arguments):
86
+ assert len(arguments) == 1
87
+ extract_outline(arguments[0])
88
+
89
+
90
+ @register_command(
91
+ "Save markdown file as outline, with filename_outline.pickle storing"
92
+ " content",
93
+ " and filename_outline.md giving outline.",
94
+ )
95
+ def xomd(arguments):
96
+ assert len(arguments) == 1
97
+ filename = arguments[0]
98
+ # read a markdown file and capture headings while keeping content for
99
+ # reordering
100
+ basename = filename.replace(".md", "")
101
+ header_re = re.compile(r"^(#{1,6})\s+(.*)")
102
+ underline_re = {
103
+ "section": re.compile(r"^={3,}\s*$"),
104
+ "subsection": re.compile(r"^-{3,}\s*$"),
105
+ }
106
+
107
+ all_contents = doc_contents_class("markdown")
108
+ previous_line = None
109
+ in_code_block = False
110
+ with open(filename, "r", encoding="utf-8") as fp:
111
+ for thisline in fp:
112
+ stripped = thisline.rstrip("\n")
113
+ if stripped.startswith("```"):
114
+ in_code_block = not in_code_block
115
+ all_contents += thisline
116
+ previous_line = None
117
+ continue
118
+ if in_code_block:
119
+ all_contents += thisline
120
+ continue
121
+ thismatch = header_re.match(stripped)
122
+ if thismatch:
123
+ hashes, thistitle = thismatch.groups()
124
+ level = len(hashes)
125
+ if level == 1:
126
+ all_contents.start_sec("section", thistitle.strip())
127
+ elif level == 2:
128
+ all_contents.start_sec("subsection", thistitle.strip())
129
+ elif level == 3:
130
+ all_contents.start_sec("subsubsection", thistitle.strip())
131
+ elif level == 4:
132
+ all_contents.start_sec("paragraph", thistitle.strip())
133
+ else:
134
+ all_contents.start_sec("subparagraph", thistitle.strip())
135
+ all_contents += "\n"
136
+ previous_line = None
137
+ continue
138
+ if previous_line is not None:
139
+ if underline_re["section"].match(stripped):
140
+ all_contents.start_sec("section", previous_line.strip())
141
+ previous_line = None
142
+ continue
143
+ if underline_re["subsection"].match(stripped):
144
+ all_contents.start_sec("subsection", previous_line.strip())
145
+ previous_line = None
146
+ continue
147
+ all_contents += previous_line + "\n"
148
+ previous_line = stripped
149
+ if previous_line:
150
+ all_contents += previous_line + "\n"
151
+ _write_outline_files(all_contents, basename)
152
+
153
+
154
+ @register_command(
155
+ "use the modified filename_outline.md to write reordered text",
156
+ help={"texfile": "TeX file to regenerate from its outline files"},
157
+ )
158
+ def xoreorder(texfile):
159
+ """Rewrite a TeX file using its saved outline and ordering hints."""
160
+
161
+ _reorder_from_outline(texfile, ".tex", "latex")
162
+
163
+
164
+ @register_command(
165
+ "rewrite a markdown file using its saved outline and ordering hints",
166
+ help={"mdfile": "Markdown file to regenerate from its outline files"},
167
+ )
168
+ def xomdreorder(mdfile):
169
+ _reorder_from_outline(mdfile, ".md", "markdown")
170
+
171
+
172
+ # Provide the previous function name for callers expecting it.
173
+ write_reordered = xoreorder