pyDiffTools 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydifftools/__init__.py +11 -0
- pydifftools/check_numbers.py +70 -0
- pydifftools/command_line.py +747 -0
- pydifftools/command_registry.py +65 -0
- pydifftools/comment_functions.py +39 -0
- pydifftools/continuous.py +194 -0
- pydifftools/copy_files.py +75 -0
- pydifftools/diff-doc.js +193 -0
- pydifftools/doc_contents.py +147 -0
- pydifftools/flowchart/__init__.py +15 -0
- pydifftools/flowchart/dot_to_yaml.py +114 -0
- pydifftools/flowchart/graph.py +620 -0
- pydifftools/flowchart/watch_graph.py +168 -0
- pydifftools/html_comments.py +33 -0
- pydifftools/html_uncomments.py +524 -0
- pydifftools/match_spaces.py +235 -0
- pydifftools/notebook/__init__.py +0 -0
- pydifftools/notebook/fast_build.py +1502 -0
- pydifftools/notebook/tex_to_qmd.py +319 -0
- pydifftools/onewordify.py +149 -0
- pydifftools/onewordify_undo.py +54 -0
- pydifftools/outline.py +173 -0
- pydifftools/rearrange_tex.py +188 -0
- pydifftools/searchacro.py +80 -0
- pydifftools/separate_comments.py +73 -0
- pydifftools/split_conflict.py +213 -0
- pydifftools/unseparate_comments.py +69 -0
- pydifftools/update_check.py +31 -0
- pydifftools/wrap_sentences.py +501 -0
- pydifftools/xml2xlsx.vbs +33 -0
- pydifftools-0.1.8.dist-info/METADATA +146 -0
- pydifftools-0.1.8.dist-info/RECORD +36 -0
- pydifftools-0.1.8.dist-info/WHEEL +5 -0
- pydifftools-0.1.8.dist-info/entry_points.txt +2 -0
- pydifftools-0.1.8.dist-info/licenses/LICENSE.md +28 -0
- pydifftools-0.1.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import sys
|
|
3
|
+
import itertools
|
|
4
|
+
|
|
5
|
+
from .command_registry import register_command
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def match_paren(thistext, pos, opener="{"):
|
|
9
|
+
closerdict = {
|
|
10
|
+
"{": "}",
|
|
11
|
+
"(": ")",
|
|
12
|
+
"[": "]",
|
|
13
|
+
"$$": "$$",
|
|
14
|
+
"~~~": "~~~",
|
|
15
|
+
"<!--": "-->",
|
|
16
|
+
}
|
|
17
|
+
if opener in closerdict.keys():
|
|
18
|
+
closer = closerdict[opener]
|
|
19
|
+
else:
|
|
20
|
+
m = re.match(r"<(\w+)", opener)
|
|
21
|
+
assert m
|
|
22
|
+
closer = "</" + m.groups()[0]
|
|
23
|
+
if thistext[pos : pos + len(opener)] == opener:
|
|
24
|
+
parenlevel = 1
|
|
25
|
+
else:
|
|
26
|
+
raise ValueError(
|
|
27
|
+
f"You aren't starting on a '{opener}':"
|
|
28
|
+
+ thistext[:pos]
|
|
29
|
+
+ ">>>>>"
|
|
30
|
+
+ thistext[pos:]
|
|
31
|
+
)
|
|
32
|
+
while parenlevel > 0 and pos < len(thistext):
|
|
33
|
+
pos += 1
|
|
34
|
+
if thistext[pos : pos + len(closer)] == closer:
|
|
35
|
+
if thistext[pos - 1] != "\\":
|
|
36
|
+
parenlevel -= 1
|
|
37
|
+
elif thistext[pos : pos + len(opener)] == opener:
|
|
38
|
+
if thistext[pos - 1] != "\\":
|
|
39
|
+
parenlevel += 1
|
|
40
|
+
if pos == len(thistext):
|
|
41
|
+
raise RuntimeError(
|
|
42
|
+
f"hit end of file without closing {opener} with {closer}\n"
|
|
43
|
+
"here is the offending text!:\n" + ("=" * 30) + thistext
|
|
44
|
+
)
|
|
45
|
+
return pos
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@register_command(
|
|
49
|
+
"wrap with indented sentence format (for markdown or latex).",
|
|
50
|
+
"wrap with indented sentence format (for markdown or latex).\n"
|
|
51
|
+
"Optional flag --cleanoo cleans latex exported from\n"
|
|
52
|
+
"OpenOffice/LibreOffice\n"
|
|
53
|
+
"Optional flag -i # specifies indentation level for subsequent\n"
|
|
54
|
+
"lines of a sentence (defaults to 4 -- e.g. for markdown you\n"
|
|
55
|
+
"will always want -i 0)",
|
|
56
|
+
help={
|
|
57
|
+
"filename": "Input file to wrap. Use '-' to read from stdin.",
|
|
58
|
+
"cleanoo": "Strip LibreOffice markup before wrapping.",
|
|
59
|
+
"i": "Indentation level for wrapped lines.",
|
|
60
|
+
},
|
|
61
|
+
)
|
|
62
|
+
def wr(filename, wrapnumber=45, punctuation_slop=20, cleanoo=False, i=-1):
|
|
63
|
+
indent_amount = i if i != -1 else 4
|
|
64
|
+
stupid_strip = cleanoo
|
|
65
|
+
if filename == "-":
|
|
66
|
+
filename = None
|
|
67
|
+
# {{{ load the file
|
|
68
|
+
if filename is not None:
|
|
69
|
+
with open(filename, encoding="utf-8") as fp:
|
|
70
|
+
alltext = fp.read()
|
|
71
|
+
# {{{ determine if the filetype is latex or markdown
|
|
72
|
+
file_extension = filename.split(".")[-1]
|
|
73
|
+
if file_extension == "tex":
|
|
74
|
+
filetype = "latex"
|
|
75
|
+
elif file_extension == "md":
|
|
76
|
+
# print("identified as markdown!!")
|
|
77
|
+
filetype = "markdown"
|
|
78
|
+
elif file_extension == "qmd":
|
|
79
|
+
# print("identified as markdown!!")
|
|
80
|
+
filetype = "markdown"
|
|
81
|
+
if filetype == "markdown":
|
|
82
|
+
if i == -1:
|
|
83
|
+
indent_amount = 0
|
|
84
|
+
# }}}
|
|
85
|
+
else:
|
|
86
|
+
sys.stdin.reconfigure(encoding="utf-8")
|
|
87
|
+
fp = sys.stdin
|
|
88
|
+
alltext = fp.read()
|
|
89
|
+
filetype = "latex"
|
|
90
|
+
# }}}
|
|
91
|
+
# {{{ strip stupid commands that appear in openoffice conversion
|
|
92
|
+
if stupid_strip:
|
|
93
|
+
alltext = re.sub(r"\\bigskip\b\s*", "", alltext)
|
|
94
|
+
alltext = re.sub(r"\\;", "", alltext)
|
|
95
|
+
alltext = re.sub(r"(?:\\ ){4}", r"\quad ", alltext)
|
|
96
|
+
alltext = re.sub(r"\\ ", " ", alltext)
|
|
97
|
+
# alltext = re.sub('\\\\\n',' ',alltext)
|
|
98
|
+
# {{{ remove select language an accompanying bracket
|
|
99
|
+
m = re.search(r"{\\selectlanguage{english}", alltext)
|
|
100
|
+
while m:
|
|
101
|
+
stop_bracket = match_paren(alltext, m.start(), "{")
|
|
102
|
+
alltext = (
|
|
103
|
+
alltext[: m.start()]
|
|
104
|
+
+ alltext[m.end() : stop_bracket]
|
|
105
|
+
+ alltext[stop_bracket + 1 :]
|
|
106
|
+
) # pos is the position of
|
|
107
|
+
# the matching curly bracket
|
|
108
|
+
m = re.search(r"{\\selectlanguage{english}", alltext)
|
|
109
|
+
# }}}
|
|
110
|
+
# {{{ remove the remaining select languages
|
|
111
|
+
m = re.search(r"\\selectlanguage{english}", alltext)
|
|
112
|
+
while m:
|
|
113
|
+
alltext = alltext[: m.start()] + alltext[m.end() :]
|
|
114
|
+
m = re.search(r"\\selectlanguage{english}", alltext)
|
|
115
|
+
# }}}
|
|
116
|
+
# {{{ remove mathit
|
|
117
|
+
m = re.search(r"\\mathit{", alltext)
|
|
118
|
+
while m:
|
|
119
|
+
# print("-------------")
|
|
120
|
+
# print(alltext[m.start() : m.end()])
|
|
121
|
+
# print("-------------")
|
|
122
|
+
stop_bracket = match_paren(alltext, m.end() - 1, "{")
|
|
123
|
+
alltext = (
|
|
124
|
+
alltext[: m.start()]
|
|
125
|
+
+ alltext[m.end() : stop_bracket]
|
|
126
|
+
+ alltext[stop_bracket + 1 :]
|
|
127
|
+
) # pos is the position of
|
|
128
|
+
# the matching curly bracket
|
|
129
|
+
m = re.search(r"\\mathit{", alltext)
|
|
130
|
+
# }}}
|
|
131
|
+
# }}}
|
|
132
|
+
alltext = alltext.split("\n\n") # split paragraphs
|
|
133
|
+
# interleave with blank strings that get turned into double newlines
|
|
134
|
+
alltext = [k for l_inner in [[j, ""] for j in alltext] for k in l_inner]
|
|
135
|
+
exclusion_idx = []
|
|
136
|
+
for para_idx in range(len(alltext)):
|
|
137
|
+
thispara_split = alltext[para_idx].split("\n")
|
|
138
|
+
if filetype == "latex":
|
|
139
|
+
line_idx = 0
|
|
140
|
+
while line_idx < len(thispara_split):
|
|
141
|
+
# {{{ exclude section headers and environments
|
|
142
|
+
thisline = thispara_split[line_idx]
|
|
143
|
+
m = re.match(
|
|
144
|
+
r"\\(?:section|subsection|subsubsection|paragraph|"
|
|
145
|
+
+ "newcommand|input){",
|
|
146
|
+
thisline,
|
|
147
|
+
)
|
|
148
|
+
if m:
|
|
149
|
+
starting_line = thisline
|
|
150
|
+
remaining_in_para = "\n".join(thispara_split[line_idx:])
|
|
151
|
+
pos = match_paren(remaining_in_para, m.span()[-1], "{")
|
|
152
|
+
# to find the closing line, I need to find the line number
|
|
153
|
+
# inside alltext[para_idx] that corresponds to the
|
|
154
|
+
# character position pos. Do this by counting the number
|
|
155
|
+
# of newlines between the character len(m.group()) and pos
|
|
156
|
+
closing_line = (
|
|
157
|
+
remaining_in_para[m.span()[-1] : pos].count("\n")
|
|
158
|
+
+ line_idx
|
|
159
|
+
)
|
|
160
|
+
exclusion_idx.append(
|
|
161
|
+
(para_idx, starting_line, closing_line)
|
|
162
|
+
)
|
|
163
|
+
line_idx = closing_line
|
|
164
|
+
# print("*" * 30, "excluding", "*" * 30)
|
|
165
|
+
# print(thispara_split[starting_line:closing_line])
|
|
166
|
+
# print("*" * 69)
|
|
167
|
+
else:
|
|
168
|
+
m = re.search(r"\\begin{(equation|align)}", thisline)
|
|
169
|
+
if m:
|
|
170
|
+
# exclude everything until the end of the environment
|
|
171
|
+
# to do this, I need to make a new string that gives
|
|
172
|
+
# everything from here until the end of
|
|
173
|
+
# alltext[para_idx]
|
|
174
|
+
notfound = True
|
|
175
|
+
for closing_idx, closing_line in enumerate(
|
|
176
|
+
thispara_split[line_idx:]
|
|
177
|
+
):
|
|
178
|
+
m_close = re.search(
|
|
179
|
+
r"\\end{" + m.group(1) + "}", closing_line
|
|
180
|
+
)
|
|
181
|
+
if m_close:
|
|
182
|
+
notfound = False
|
|
183
|
+
break
|
|
184
|
+
if notfound:
|
|
185
|
+
raise RuntimeError(
|
|
186
|
+
"didn't find closing line for environment"
|
|
187
|
+
)
|
|
188
|
+
exclusion_idx.append(
|
|
189
|
+
(para_idx, line_idx, line_idx + closing_idx)
|
|
190
|
+
)
|
|
191
|
+
# print("*" * 30, "excluding env", "*" * 30)
|
|
192
|
+
# print(thispara_split[line_idx:closing_idx])
|
|
193
|
+
# print("*" * 73)
|
|
194
|
+
line_idx = line_idx + closing_idx
|
|
195
|
+
line_idx += 1
|
|
196
|
+
# }}}
|
|
197
|
+
elif filetype == "markdown":
|
|
198
|
+
line_idx = 0
|
|
199
|
+
if para_idx == 0 and line_idx == 0:
|
|
200
|
+
# watch out for yaml header
|
|
201
|
+
# print("first line is", thispara_split[line_idx])
|
|
202
|
+
if thispara_split[line_idx].startswith(
|
|
203
|
+
"---"
|
|
204
|
+
) or thispara_split[line_idx].startswith("..."):
|
|
205
|
+
starting_line = line_idx
|
|
206
|
+
j = 1
|
|
207
|
+
while j < len(thispara_split):
|
|
208
|
+
if (
|
|
209
|
+
thispara_split[j].strip() == "---"
|
|
210
|
+
or thispara_split[j].strip() == "..."
|
|
211
|
+
):
|
|
212
|
+
closing_line = j
|
|
213
|
+
exclusion_idx.append(
|
|
214
|
+
(para_idx, starting_line, closing_line)
|
|
215
|
+
)
|
|
216
|
+
break
|
|
217
|
+
j += 1
|
|
218
|
+
while line_idx < len(thispara_split):
|
|
219
|
+
thisline = thispara_split[line_idx]
|
|
220
|
+
# {{{ do the same thing for markdown, where I exclude (1)
|
|
221
|
+
# headers (2) figures and (3) tables (4) font
|
|
222
|
+
m = re.match(r"#+\s.*", thisline) # exclude headers
|
|
223
|
+
if m:
|
|
224
|
+
exclusion_idx.append((para_idx, line_idx, line_idx))
|
|
225
|
+
# print("*" * 30, "excluding header", "*" * 30)
|
|
226
|
+
# print(thispara_split[line_idx])
|
|
227
|
+
# print("*" * 73)
|
|
228
|
+
else:
|
|
229
|
+
m = re.search(r"!\[.*\]\(", thisline) # exclude figures
|
|
230
|
+
if m:
|
|
231
|
+
# {{{ find the closing ), as we did for latex commands
|
|
232
|
+
# above
|
|
233
|
+
remaining_in_para = "\n".join(
|
|
234
|
+
thispara_split[line_idx:]
|
|
235
|
+
)
|
|
236
|
+
pos = match_paren(
|
|
237
|
+
remaining_in_para, m.span()[-1] - 1, "("
|
|
238
|
+
)
|
|
239
|
+
closing_line = (
|
|
240
|
+
remaining_in_para[m.span()[-1] : pos].count("\n")
|
|
241
|
+
+ line_idx
|
|
242
|
+
)
|
|
243
|
+
exclusion_idx.append(
|
|
244
|
+
(para_idx, line_idx, closing_line)
|
|
245
|
+
)
|
|
246
|
+
line_idx = closing_line
|
|
247
|
+
# }}}
|
|
248
|
+
else:
|
|
249
|
+
m = re.search(
|
|
250
|
+
r"(\|.*\||=\+==|-\+--)", thisline
|
|
251
|
+
) # exclude tables
|
|
252
|
+
if m:
|
|
253
|
+
starting_line = line_idx
|
|
254
|
+
m2 = re.search(
|
|
255
|
+
r"(\|.*\||=\+==|-\+--)",
|
|
256
|
+
thispara_split[line_idx + 1],
|
|
257
|
+
) # need at least 2 lines
|
|
258
|
+
if m2:
|
|
259
|
+
while True:
|
|
260
|
+
line_idx += 1
|
|
261
|
+
if line_idx > len(thispara_split) - 1:
|
|
262
|
+
line_idx -= 1
|
|
263
|
+
break
|
|
264
|
+
thisline = thispara_split[line_idx]
|
|
265
|
+
m = re.search(
|
|
266
|
+
r"(\|.*\||=\+==|-\+--)", thisline
|
|
267
|
+
)
|
|
268
|
+
if not m:
|
|
269
|
+
line_idx -= 1
|
|
270
|
+
break
|
|
271
|
+
exclusion_idx.append(
|
|
272
|
+
(para_idx, starting_line, line_idx)
|
|
273
|
+
)
|
|
274
|
+
# print("*" * 30, "excluding table", "*" * 30)
|
|
275
|
+
# print(
|
|
276
|
+
# thispara_split[
|
|
277
|
+
# starting_line : line_idx + 1
|
|
278
|
+
# ]
|
|
279
|
+
# )
|
|
280
|
+
# print("*" * 73)
|
|
281
|
+
else:
|
|
282
|
+
m = re.search(
|
|
283
|
+
r"\$\$", thisline
|
|
284
|
+
) # exclude equations
|
|
285
|
+
if m:
|
|
286
|
+
starting_line = line_idx
|
|
287
|
+
# {{{ find the closing $$, as we did for latex
|
|
288
|
+
# commands above
|
|
289
|
+
remaining_in_para = "\n".join(
|
|
290
|
+
thispara_split[line_idx:]
|
|
291
|
+
)
|
|
292
|
+
pos = match_paren(
|
|
293
|
+
remaining_in_para, m.span()[-1] - 2, "$$"
|
|
294
|
+
)
|
|
295
|
+
closing_line = (
|
|
296
|
+
remaining_in_para[
|
|
297
|
+
m.span()[-1] : pos
|
|
298
|
+
].count("\n")
|
|
299
|
+
+ line_idx
|
|
300
|
+
)
|
|
301
|
+
exclusion_idx.append(
|
|
302
|
+
(para_idx, line_idx, closing_line)
|
|
303
|
+
)
|
|
304
|
+
line_idx = closing_line
|
|
305
|
+
# }}}
|
|
306
|
+
else:
|
|
307
|
+
m = re.search(
|
|
308
|
+
r"^~~~", thisline
|
|
309
|
+
) # exclude equations
|
|
310
|
+
if m:
|
|
311
|
+
starting_line = line_idx
|
|
312
|
+
# {{{ find the closing $$, as we did for
|
|
313
|
+
# latex commands above
|
|
314
|
+
remaining_in_para = "\n".join(
|
|
315
|
+
thispara_split[line_idx:]
|
|
316
|
+
)
|
|
317
|
+
pos = match_paren(
|
|
318
|
+
remaining_in_para, m.span()[0], "~~~"
|
|
319
|
+
)
|
|
320
|
+
closing_line = (
|
|
321
|
+
remaining_in_para[
|
|
322
|
+
m.span()[-1] : pos
|
|
323
|
+
].count("\n")
|
|
324
|
+
+ line_idx
|
|
325
|
+
)
|
|
326
|
+
exclusion_idx.append(
|
|
327
|
+
(para_idx, line_idx, closing_line)
|
|
328
|
+
)
|
|
329
|
+
line_idx = closing_line
|
|
330
|
+
# }}}
|
|
331
|
+
else:
|
|
332
|
+
m = re.search(
|
|
333
|
+
r"<(\w+) ?.*>", thisline
|
|
334
|
+
) # exclude things enclosed in tags
|
|
335
|
+
if m:
|
|
336
|
+
starting_line = line_idx
|
|
337
|
+
# {{{ find the closing $$, as we did
|
|
338
|
+
# for latex commands above
|
|
339
|
+
remaining_in_para = "\n".join(
|
|
340
|
+
thispara_split[line_idx:]
|
|
341
|
+
)
|
|
342
|
+
pos = match_paren(
|
|
343
|
+
remaining_in_para,
|
|
344
|
+
m.span()[0],
|
|
345
|
+
"<" + m.groups()[0],
|
|
346
|
+
)
|
|
347
|
+
closing_line = (
|
|
348
|
+
remaining_in_para[
|
|
349
|
+
m.span()[-1] : pos
|
|
350
|
+
].count("\n")
|
|
351
|
+
+ line_idx
|
|
352
|
+
)
|
|
353
|
+
exclusion_idx.append(
|
|
354
|
+
(para_idx, line_idx, closing_line)
|
|
355
|
+
)
|
|
356
|
+
line_idx = closing_line
|
|
357
|
+
# }}}
|
|
358
|
+
line_idx += 1
|
|
359
|
+
# }}}
|
|
360
|
+
# print("all exclusions:", exclusion_idx)
|
|
361
|
+
all_text_procd = []
|
|
362
|
+
for para_idx in range(len(alltext)): # split paragraphs into sentences
|
|
363
|
+
para_lines = alltext[para_idx].split("\n")
|
|
364
|
+
# list comprehension to grab excluded lines for this paragraph
|
|
365
|
+
excluded_lines = [j[1:] for j in exclusion_idx if j[0] == para_idx]
|
|
366
|
+
# chunk para_lines into a list of tuples, where each tuple is a boolean
|
|
367
|
+
# (False if excluded) and the line itself
|
|
368
|
+
para_lines = [(True, j) for j in para_lines]
|
|
369
|
+
for start_excl, stop_excl in excluded_lines:
|
|
370
|
+
para_lines[start_excl : stop_excl + 1] = [
|
|
371
|
+
(False, j[1]) for j in para_lines[start_excl : stop_excl + 1]
|
|
372
|
+
]
|
|
373
|
+
# use join inside a list comprehension to gather contiguous chunks of
|
|
374
|
+
# True and False together
|
|
375
|
+
para_lines = [
|
|
376
|
+
(key, "\n".join([j[1] for j in group]))
|
|
377
|
+
for key, group in itertools.groupby(para_lines, lambda x: x[0])
|
|
378
|
+
]
|
|
379
|
+
# print("here are the grouped para lines!----------------", para_lines)
|
|
380
|
+
for notexcl, thiscontent in para_lines:
|
|
381
|
+
if notexcl:
|
|
382
|
+
# {{{ here I need a trick to prevent including short
|
|
383
|
+
# abbreviations, etc
|
|
384
|
+
tempsent = re.split(r"([^\.!?]{3}[\.!?])[ \n]", thiscontent)
|
|
385
|
+
# for j in tempsent:
|
|
386
|
+
# #rint("--", j)
|
|
387
|
+
# {{{ put the "separators together with the preceding
|
|
388
|
+
temp_paragraph = []
|
|
389
|
+
for tempsent_num in range(0, len(tempsent), 2):
|
|
390
|
+
if tempsent_num + 1 < len(tempsent):
|
|
391
|
+
temp_paragraph.append(
|
|
392
|
+
tempsent[tempsent_num] + tempsent[tempsent_num + 1]
|
|
393
|
+
)
|
|
394
|
+
else:
|
|
395
|
+
temp_paragraph.append(tempsent[tempsent_num])
|
|
396
|
+
# print("-------------------")
|
|
397
|
+
thiscontent = []
|
|
398
|
+
for this_sent in temp_paragraph:
|
|
399
|
+
thiscontent.extend(
|
|
400
|
+
re.split(
|
|
401
|
+
r"(\\(?:begin|end|usepackage|newcommand|section"
|
|
402
|
+
+ "|subsection|subsubsection|paragraph"
|
|
403
|
+
+ "|input){[^}]*})",
|
|
404
|
+
this_sent,
|
|
405
|
+
)
|
|
406
|
+
)
|
|
407
|
+
# for this_sent in thiscontent:
|
|
408
|
+
# #rint("--sentence: ", this_sent)
|
|
409
|
+
# }}}
|
|
410
|
+
# }}}
|
|
411
|
+
for sent_idx in range(
|
|
412
|
+
len(thiscontent)
|
|
413
|
+
): # sentences into words
|
|
414
|
+
thiscontent[sent_idx] = [
|
|
415
|
+
word
|
|
416
|
+
for word in re.split("[ \n]+", thiscontent[sent_idx])
|
|
417
|
+
if len(word) > 0
|
|
418
|
+
]
|
|
419
|
+
if len(thiscontent) == 1 and len(thiscontent[0]) == 0:
|
|
420
|
+
all_text_procd += [(True, [[""]])]
|
|
421
|
+
else:
|
|
422
|
+
all_text_procd += [(True, thiscontent)]
|
|
423
|
+
else:
|
|
424
|
+
all_text_procd += [(False, thiscontent)]
|
|
425
|
+
alltext = all_text_procd
|
|
426
|
+
# print("*" * 50 + "\n" + "parsed alltext" + "*" * 50)
|
|
427
|
+
# print(alltext)
|
|
428
|
+
# print("\n\n")
|
|
429
|
+
# {{{ now that it's organized into paragraphs, sentences, and
|
|
430
|
+
# words, wrap the sentences
|
|
431
|
+
lines = []
|
|
432
|
+
indentation = 0
|
|
433
|
+
for para_idx in range(len(alltext)): # paragraph number
|
|
434
|
+
notexcl, para_content = alltext[para_idx]
|
|
435
|
+
if notexcl:
|
|
436
|
+
for residual_sentence in para_content:
|
|
437
|
+
if residual_sentence == [""]:
|
|
438
|
+
indentation = 0
|
|
439
|
+
lines.append("")
|
|
440
|
+
continue
|
|
441
|
+
if filetype == "latex":
|
|
442
|
+
indentation = 0
|
|
443
|
+
while len(residual_sentence) > 0:
|
|
444
|
+
# Compute cumulative character counts without relying on
|
|
445
|
+
# numpy.
|
|
446
|
+
numchars = [len(word) + 1 for word in residual_sentence]
|
|
447
|
+
cumsum_num = []
|
|
448
|
+
running_total = 0
|
|
449
|
+
for num in numchars:
|
|
450
|
+
running_total += num
|
|
451
|
+
cumsum_num.append(running_total)
|
|
452
|
+
nextline_upto = min(
|
|
453
|
+
range(len(cumsum_num)),
|
|
454
|
+
key=lambda j: abs(cumsum_num[j] - wrapnumber),
|
|
455
|
+
)
|
|
456
|
+
nextline_punct_upto = []
|
|
457
|
+
for j, word in enumerate(residual_sentence):
|
|
458
|
+
if (
|
|
459
|
+
word[-1] in [",", ";", ":", ")", "-"]
|
|
460
|
+
and len(word) > 1
|
|
461
|
+
):
|
|
462
|
+
nextline_punct_upto.append(cumsum_num[j])
|
|
463
|
+
else:
|
|
464
|
+
nextline_punct_upto.append(10000)
|
|
465
|
+
if any(value < 10000 for value in nextline_punct_upto):
|
|
466
|
+
nextline_punct_upto = min(
|
|
467
|
+
range(len(nextline_punct_upto)),
|
|
468
|
+
key=lambda j: abs(
|
|
469
|
+
nextline_punct_upto[j] - wrapnumber
|
|
470
|
+
),
|
|
471
|
+
)
|
|
472
|
+
if nextline_punct_upto < nextline_upto:
|
|
473
|
+
if (
|
|
474
|
+
nextline_upto - nextline_punct_upto
|
|
475
|
+
< punctuation_slop
|
|
476
|
+
):
|
|
477
|
+
nextline_upto = nextline_punct_upto
|
|
478
|
+
# print(
|
|
479
|
+
# "-" * 10 + " here is the residual sentence:\n\t",
|
|
480
|
+
# residual_sentence,
|
|
481
|
+
# )
|
|
482
|
+
lines.append(
|
|
483
|
+
" " * indentation
|
|
484
|
+
+ " ".join(residual_sentence[: nextline_upto + 1])
|
|
485
|
+
)
|
|
486
|
+
residual_sentence = residual_sentence[nextline_upto + 1 :]
|
|
487
|
+
if indentation == 0:
|
|
488
|
+
indentation = indent_amount
|
|
489
|
+
else:
|
|
490
|
+
lines += [para_content]
|
|
491
|
+
indentation = (
|
|
492
|
+
0 # if excluded or new sentence, indentation goes back to zero
|
|
493
|
+
)
|
|
494
|
+
# print("here are lines!!\n\n\n\n", lines)
|
|
495
|
+
# }}}
|
|
496
|
+
if filename is None:
|
|
497
|
+
print("\n".join(lines))
|
|
498
|
+
else:
|
|
499
|
+
fp = open(filename, "w", encoding="utf-8")
|
|
500
|
+
fp.write("\n".join(lines))
|
|
501
|
+
fp.close()
|
pydifftools/xml2xlsx.vbs
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
'from http://stackoverflow.com/questions/1858195/convert-xls-to-csv-on-command-line and https://msdn.microsoft.com/en-us/library/office/ff198017.aspx
|
|
2
|
+
if WScript.Arguments.Count < 2 Then
|
|
3
|
+
WScript.Echo "Please specify the source and the destination files. Usage: xml2xls <xml source file> <xlsx destination file>"
|
|
4
|
+
Wscript.Quit
|
|
5
|
+
End If
|
|
6
|
+
|
|
7
|
+
csv_format = 6
|
|
8
|
+
xlsx_format = 51
|
|
9
|
+
xml_spreadsheet = 46
|
|
10
|
+
|
|
11
|
+
if WScript.Arguments.Count = 4 Then
|
|
12
|
+
input_format = WScript.Arguments.Item(2)
|
|
13
|
+
output_format = WScript.Arguments.Item(3)
|
|
14
|
+
Else
|
|
15
|
+
input_format = xml_spreadsheet
|
|
16
|
+
output_format = xlsx_format
|
|
17
|
+
End If
|
|
18
|
+
|
|
19
|
+
Set objFSO = CreateObject("Scripting.FileSystemObject")
|
|
20
|
+
|
|
21
|
+
src_file = objFSO.GetAbsolutePathName(Wscript.Arguments.Item(0))
|
|
22
|
+
dest_file = objFSO.GetAbsolutePathName(WScript.Arguments.Item(1))
|
|
23
|
+
|
|
24
|
+
Dim oExcel
|
|
25
|
+
Set oExcel = CreateObject("Excel.Application")
|
|
26
|
+
|
|
27
|
+
Dim oBook
|
|
28
|
+
Set oBook = oExcel.Workbooks.Open(src_file)
|
|
29
|
+
|
|
30
|
+
oBook.SaveAs dest_file, output_format
|
|
31
|
+
|
|
32
|
+
oBook.Close False
|
|
33
|
+
oExcel.Quit
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyDiffTools
|
|
3
|
+
Version: 0.1.8
|
|
4
|
+
Summary: Diff tools
|
|
5
|
+
Author: J M Franck
|
|
6
|
+
License: Copyright (c) 2015, jmfranck
|
|
7
|
+
All rights reserved.
|
|
8
|
+
|
|
9
|
+
Redistribution and use in source and binary forms, with or without
|
|
10
|
+
modification, are permitted provided that the following conditions are met:
|
|
11
|
+
|
|
12
|
+
* Redistributions of source code must retain the above copyright notice, this
|
|
13
|
+
list of conditions and the following disclaimer.
|
|
14
|
+
|
|
15
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
16
|
+
this list of conditions and the following disclaimer in the documentation
|
|
17
|
+
and/or other materials provided with the distribution.
|
|
18
|
+
|
|
19
|
+
* Neither the name of pyDiffTools nor the names of its
|
|
20
|
+
contributors may be used to endorse or promote products derived from
|
|
21
|
+
this software without specific prior written permission.
|
|
22
|
+
|
|
23
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
24
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
25
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
26
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
27
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
28
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
29
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
30
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
31
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
32
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
Description-Content-Type: text/x-rst
|
|
36
|
+
License-File: LICENSE.md
|
|
37
|
+
Requires-Dist: selenium
|
|
38
|
+
Requires-Dist: fuzzywuzzy[speedup]
|
|
39
|
+
Requires-Dist: PyYAML>=6.0
|
|
40
|
+
Requires-Dist: watchdog
|
|
41
|
+
Requires-Dist: pydot
|
|
42
|
+
Requires-Dist: python-dateutil
|
|
43
|
+
Requires-Dist: jinja2
|
|
44
|
+
Requires-Dist: nbformat
|
|
45
|
+
Requires-Dist: nbconvert
|
|
46
|
+
Requires-Dist: pygments
|
|
47
|
+
Requires-Dist: ansi2html
|
|
48
|
+
Requires-Dist: lxml
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
|
|
51
|
+
pydifftools
|
|
52
|
+
===========
|
|
53
|
+
|
|
54
|
+
:Info: See <https://github.com/jmfranck/pyDiffTools>
|
|
55
|
+
:Author: J. M. Franck <https://github.com/jmfranck>
|
|
56
|
+
|
|
57
|
+
.. _vim: http://www.vim.org
|
|
58
|
+
|
|
59
|
+
this is a set of tools to help with merging, mostly for use with vim_.
|
|
60
|
+
|
|
61
|
+
The scripts are accessed with the command ``pydifft``
|
|
62
|
+
|
|
63
|
+
included are (listed in order of fun/utility):
|
|
64
|
+
|
|
65
|
+
- `pydifft cpb <filename.md>` ("continuous pandoc build")
|
|
66
|
+
This continuously monitors
|
|
67
|
+
`filename.md`, build the result,
|
|
68
|
+
and displays it in your browser.
|
|
69
|
+
|
|
70
|
+
Continuous pandoc build.
|
|
71
|
+
This works *very well* together
|
|
72
|
+
with the `g/` vim command
|
|
73
|
+
(supplied by our standard vimrc
|
|
74
|
+
gist) to search for phrases (for
|
|
75
|
+
example `g/ n sp me` to find "new
|
|
76
|
+
spectroscopic methodology" -- this
|
|
77
|
+
works *much better* than you
|
|
78
|
+
would expect)
|
|
79
|
+
|
|
80
|
+
For this to work, you need to
|
|
81
|
+
**install selenium with** `pip
|
|
82
|
+
install selenium` *not conda*.
|
|
83
|
+
Then go to `the selenium page <https://pypi.org/project/selenium/>`_
|
|
84
|
+
and download the chrome driver.
|
|
85
|
+
Note that from there, it can be hard to find the
|
|
86
|
+
chrome driver -- as of this update,
|
|
87
|
+
the drivers are `here <https://googlechromelabs.github.io/chrome-for-testing/#stable>`_,
|
|
88
|
+
but it seems like google is moving them around.
|
|
89
|
+
You also need to install `pandoc <https://pandoc.org/installing.html>`_
|
|
90
|
+
as well as `pandoc-crossref <https://github.com/lierdakil/pandoc-crossref>`_
|
|
91
|
+
(currently tested on windows with *version 3.5* of the former,
|
|
92
|
+
*not the latest installer*,
|
|
93
|
+
since crossref isn't built with the most recent version).
|
|
94
|
+
- `pydifft wgrph <graph.yaml>` watches a YAML flowchart description,
|
|
95
|
+
rebuilds the DOT/SVG output using GraphViz, and keeps a browser window
|
|
96
|
+
refreshed as you edit the file. This wraps the former
|
|
97
|
+
``flowchart/watch_graph.py`` script so all of its functionality is now
|
|
98
|
+
available through the main ``pydifft`` entry point.
|
|
99
|
+
- `pydifft tex2qmd file.tex` converts LaTeX sources to Quarto markdown.
|
|
100
|
+
The converter preserves custom observation blocks and errata tags while
|
|
101
|
+
translating verbatim/python environments into fenced code blocks so the
|
|
102
|
+
result is ready for the Pandoc-based builder.
|
|
103
|
+
- `pydifft qmdb [--watch] [--no-browser] [--webtex]` runs the relocated
|
|
104
|
+
``fast_build.py`` logic from inside the package. Without ``--watch`` it
|
|
105
|
+
performs a single build of the configured `_quarto.yml` targets into the
|
|
106
|
+
``_build``/``_display`` directories; with ``--watch`` it starts the HTTP
|
|
107
|
+
server and automatically rebuilds the staged fragments whenever you edit
|
|
108
|
+
a ``.qmd`` file.
|
|
109
|
+
- `pydifft qmdinit [directory]` scaffolds a new Quarto-style project using
|
|
110
|
+
the bundled templates and example ``project1`` hierarchy, then downloads
|
|
111
|
+
MathJax into ``_template/mathjax`` so the builder can run immediately.
|
|
112
|
+
This is analogous to ``git init`` for markdown notebooks.
|
|
113
|
+
- `pydifft wr <filename.tex|md>` (wrap)
|
|
114
|
+
This provides a standardized (and
|
|
115
|
+
short) line
|
|
116
|
+
wrapping, ideal for when you are
|
|
117
|
+
working on manuscripts that you
|
|
118
|
+
are version tracking with git.
|
|
119
|
+
- `pydifft wmatch` ("whitespace match"): a script that matches whitespace between two text files.
|
|
120
|
+
|
|
121
|
+
* pandoc can convert between markdown/latex/word, but doing this messes with your whitespace and gvimdiff comparisons.
|
|
122
|
+
|
|
123
|
+
* this allows you to use an original file with good whitespace formatting as a "template" that you can match other (e.g. pandoc converted file) onto another
|
|
124
|
+
|
|
125
|
+
- `pydifft wd` ("word diff"): generate "track changes" word files starting from pandoc markdown in a git history. Assuming that you have copied diff-doc.js (copied + licensed from elsewhere) into your home directory, this will use pandoc to convert the markdown files to MS Word, then use the MS Word comparison tool to generate a document where all relevant changes are shown with "track changes."
|
|
126
|
+
|
|
127
|
+
* by default, this uses the file `template.docx` in the current directory as a pandoc word template
|
|
128
|
+
|
|
129
|
+
- `pydifft sc` ("split conflicts"): a very basic merge tool that takes a conflicted file and generates a .merge_head and .merge_new file, where basic
|
|
130
|
+
|
|
131
|
+
* you can use this directly with gvimdiff, you can use the files in a standard gvimdiff merge
|
|
132
|
+
|
|
133
|
+
* unlike the standard merge tool, it will
|
|
134
|
+
|
|
135
|
+
* less complex than the gvimdiff merge tool used with git.
|
|
136
|
+
|
|
137
|
+
* works with "onewordify," below
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
- a script that searches a notebook for numbered tasks, and sees whether or not they match (this is for organizing a lab notebook, to be described)
|
|
141
|
+
|
|
142
|
+
Future versions will include:
|
|
143
|
+
|
|
144
|
+
- Scripts for converting word html comments to latex commands.
|
|
145
|
+
|
|
146
|
+
- converting to/form one word per line files (for doing things like wdiff, but with more control)
|