pyDiffTools 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,524 @@
1
+ # again rerun
2
+ from lxml import html, etree
3
+ import os
4
+ from pyspecdata import *
5
+ from unidecode import unidecode
6
+ import re
7
+ import sys
8
+ from .comment_functions import (
9
+ generate_alphabetnumber,
10
+ matchingbrackets,
11
+ comment_definition,
12
+ )
13
+
14
+ manual_math_conversion = (
15
+ False # this hacks some stuff that pandoc does much better
16
+ )
17
+ fp = open(sys.argv[1], "r")
18
+ content = fp.read()
19
+ fp.close()
20
+ # comrefwithnewline_re = re.compile(r"('mso-comment-reference:[^']*)[\n ]+")
21
+ # {{{ need to remove weird linebreaks with the following, or it doesn't interpret the styles correctly
22
+ newcontent = re.sub(r":\n *", r":", content)
23
+ content = newcontent
24
+ newcontent = re.sub(r"('mso-comment-reference:[^']*)[\n ]+", r"\1", content)
25
+ while content != newcontent:
26
+ content = newcontent
27
+ newcontent = re.sub(
28
+ r"('mso-comment-reference:[^']*)[\n ]+", r"\1", content
29
+ )
30
+ content = newcontent
31
+ newcontent = re.sub(
32
+ r"('mso-comment-reference:[^'\"]*[^;])(['\"])", r"\1;\2", content
33
+ )
34
+ while content != newcontent:
35
+ content = newcontent
36
+ newcontent = re.sub(
37
+ r"('mso-comment-reference:[^'\"]*[^;])(['\"])", r"\1;\2", content
38
+ )
39
+ content = newcontent
40
+ content = content.replace(r"\%", r"%EXPLICITPAREN%")
41
+ content = content.replace(r"%", r"%EXPLICITPAREN%")
42
+ content = content.replace(r"%EXPLICITPAREN%", r"\%")
43
+ if manual_math_conversion:
44
+ content = content.replace(
45
+ "Δ", r"%ENTERMATHMODE%\Delta%LEAVEMATHMODE%"
46
+ )
47
+ content = content.replace("\xb0C", r"\degC ")
48
+ content = content.replace(" \xb5M", r"\uM ")
49
+ content = content.replace("\xb5M", r"\uM ")
50
+ content = content.replace(
51
+ "α", r"%ENTERMATHMODE%\alpha%LEAVEMATHMODE%"
52
+ )
53
+ content = content.replace("β", r"%ENTERMATHMODE%\beta%LEAVEMATHMODE%")
54
+ content = content.replace(
55
+ "γ", r"%ENTERMATHMODE%\gamma%LEAVEMATHMODE%"
56
+ )
57
+ content = content.replace(
58
+ "δ", r"%ENTERMATHMODE%\delta%LEAVEMATHMODE%"
59
+ )
60
+ content = content.replace(
61
+ "ε", r"%ENTERMATHMODE%\varepsilon%LEAVEMATHMODE%"
62
+ )
63
+ content = content.replace("ζ", r"%ENTERMATHMODE%\zeta%LEAVEMATHMODE%")
64
+ content = content.replace("η", r"%ENTERMATHMODE%\eta%LEAVEMATHMODE%")
65
+ content = content.replace(
66
+ "θ", r"%ENTERMATHMODE%\theta%LEAVEMATHMODE%"
67
+ )
68
+ content = content.replace("ι", r"%ENTERMATHMODE%\iota%LEAVEMATHMODE%")
69
+ content = content.replace(
70
+ "κ", r"%ENTERMATHMODE%\kappa%LEAVEMATHMODE%"
71
+ )
72
+ content = content.replace(
73
+ "λ", r"%ENTERMATHMODE%\lambda%LEAVEMATHMODE%"
74
+ )
75
+ content = content.replace("μ", r"%ENTERMATHMODE%\mu%LEAVEMATHMODE%")
76
+ content = content.replace("ν", r"%ENTERMATHMODE%\nu%LEAVEMATHMODE%")
77
+ content = content.replace("ξ", r"%ENTERMATHMODE%\xi%LEAVEMATHMODE%")
78
+ content = content.replace(
79
+ "ο", r"%ENTERMATHMODE%\omicron%LEAVEMATHMODE%"
80
+ )
81
+ content = content.replace("π", r"%ENTERMATHMODE%\pi%LEAVEMATHMODE%")
82
+ content = content.replace("ρ", r"%ENTERMATHMODE%\rho%LEAVEMATHMODE%")
83
+ content = content.replace(
84
+ "σ", r"%ENTERMATHMODE%\sigma%LEAVEMATHMODE%"
85
+ )
86
+ content = content.replace("τ", r"%ENTERMATHMODE%\tau%LEAVEMATHMODE%")
87
+ content = content.replace(
88
+ "φ", r"%ENTERMATHMODE%\varphi%LEAVEMATHMODE%"
89
+ )
90
+ content = content.replace("χ", r"%ENTERMATHMODE%\chi%LEAVEMATHMODE%")
91
+ content = content.replace("ψ", r"%ENTERMATHMODE%\psi%LEAVEMATHMODE%")
92
+ content = content.replace(
93
+ "ω", r"%ENTERMATHMODE%\omega%LEAVEMATHMODE%"
94
+ )
95
+ content = content.replace("′", r"%ENTERMATHMODE%\'%LEAVEMATHMODE%")
96
+ content = content.replace("—", r"--")
97
+ content = content.replace("’", r"'")
98
+ content = content.replace("“", r"``")
99
+ content = content.replace("”", r"''")
100
+ content = content.replace("ℜ", r"%ENTERMATHMODE%\Re%LEAVEMATHMODE%")
101
+ content = content.replace(
102
+ "⇒", r"%ENTERMATHMODE%\Rightarrow%LEAVEMATHMODE%"
103
+ )
104
+ content = content.replace(
105
+ "⇐", r"%ENTERMATHMODE%\Leftarrow%LEAVEMATHMODE%"
106
+ )
107
+ content = content.replace("∑", r"%ENTERMATHMODE%\Sum%LEAVEMATHMODE%")
108
+ content = content.replace("−", r"--")
109
+ content = content.replace("∕", r"/")
110
+ content = content.replace("∗", r"%ENTERMATHMODE%^*%LEAVEMATHMODE%")
111
+ content = content.replace("∼", r"%ENTERMATHMODE%\sim%LEAVEMATHMODE%")
112
+ content = content.replace(
113
+ "∝", r"%ENTERMATHMODE%\propto%LEAVEMATHMODE%"
114
+ )
115
+ content = content.replace(
116
+ "∞", r"%ENTERMATHMODE%\infty%LEAVEMATHMODE%"
117
+ )
118
+ content = content.replace(
119
+ "≈", r"%ENTERMATHMODE%\approx%LEAVEMATHMODE%"
120
+ )
121
+ content = content.replace(
122
+ "≡", r"%ENTERMATHMODE%\equiv%LEAVEMATHMODE%"
123
+ )
124
+ content = content.replace("≤", r"%ENTERMATHMODE%\le%LEAVEMATHMODE%")
125
+ content = content.replace("≥", r"%ENTERMATHMODE%\ge%LEAVEMATHMODE%")
126
+ content = content.replace("≪", r"%ENTERMATHMODE%\ll%LEAVEMATHMODE%")
127
+ content = content.replace("≫", r"%ENTERMATHMODE%\gg%LEAVEMATHMODE%")
128
+ content = content.replace(
129
+ "⋅", r"%ENTERMATHMODE%\cdot%LEAVEMATHMODE%"
130
+ )
131
+ content = content.replace(
132
+ "𝔢", r"%ENTERMATHMODE%\mathfrak{e}%LEAVEMATHMODE%"
133
+ )
134
+ content = content.replace(
135
+ "$$", ""
136
+ ) # math symbols doubled back on each other
137
+ # }}}
138
+ # content = re.sub(r'mso-comment-reference:([a-zA-Z_0-9]+)&([a-zA-Z_0-9]+)',r'mso-comment-reference:\1AMPERSAND\2',content)
139
+ # content = re.sub(r'mso-comment-reference:[\n ]*([a-zA-Z0-9]+)',r'narg!mso-comment-reference:\1',content)
140
+ doc = html.fromstring(content)
141
+ commentlabel_re = re.compile(r"\[([A-Z]+)([0-9])\]")
142
+ inlineequation_re = re.compile(r"\$([^\$]*)\$")
143
+ # for j in doc.xpath('descendant::*[@style="mso-element:comment"]'):
144
+ thisbody = doc.find("body")
145
+ print("I found the body", lsafen(thisbody))
146
+ # commentlist = etree.Element('div',style = 'mso-element:comment-list')
147
+ num = 0
148
+ numcomments = 0
149
+ numcompara = 0
150
+ comment_dict = {}
151
+ comment_label_re = re.compile(r"_com_([0-9]+)")
152
+ for j in doc.xpath('//*[contains(@style,"font-family:Symbol")]'):
153
+ print('found symbol with text"', j.text, '" and dropped the tag')
154
+ j.drop_tag()
155
+ for j in doc.xpath('//div[@style="mso-element:comment-list"]'):
156
+ num += 1
157
+ for k in j.xpath('descendant-or-self::*[@style="mso-element:comment"]'):
158
+ numcomments += 1
159
+ numcompara = 0
160
+ commenttext = []
161
+
162
+ def process_comment_text(thistag, numcompara, commenttext):
163
+ for m in k.find_class("msocomtxt"):
164
+ mymatch = comment_label_re.match(m.attrib["id"])
165
+ if mymatch:
166
+ commentlabel = mymatch.groups()[0]
167
+ print("that means it's comment", commentlabel)
168
+ else:
169
+ raise ValueError(
170
+ "I don't understand what the comment id "
171
+ + m.attrib["id"]
172
+ + " means"
173
+ )
174
+ numcompara += 1
175
+ for m in thistag.xpath(
176
+ 'descendant-or-self::span[@style="mso-special-character:comment"]'
177
+ ):
178
+ m.drop_tree()
179
+ print("dropped special character")
180
+ commenttext.append(unidecode(thistag.text_content()))
181
+ return commentlabel, numcompara
182
+
183
+ found_something = False
184
+ class_types = ["MsoCommentText", "MsoNormal", "indent", "noindent"]
185
+ for class_type in class_types:
186
+ for l in k.find_class(class_type):
187
+ commentlabel, numcompara = process_comment_text(
188
+ l, numcompara, commenttext
189
+ )
190
+ found_something = True
191
+ if not found_something:
192
+ print(
193
+ (
194
+ "Wargning: I found no "
195
+ + ",".join(class_types)
196
+ + " in this comment --\n%s\n -- in the future, should search by paragraph tag, instead"
197
+ % html.tostring(k)
198
+ )
199
+ )
200
+ k.drop_tree() # drop the stuff at the end
201
+ print(
202
+ "for comment %d, I find %d paragraphs" % (numcomments, numcompara)
203
+ )
204
+ comment_dict[commentlabel] = "\n\n".join(commenttext)
205
+ print("text looks like this:", comment_dict[commentlabel])
206
+ # and load into the dictionary
207
+ # {{{ remove the children, set the comment text as the text, and drop the tag
208
+ # for l in k.getchildren():
209
+ # l.drop_tree()
210
+ # k.text = '\n\n'.join(commenttext)
211
+ # k.drop_tag()
212
+ # }}}
213
+ # print 'comment %d is:'%numcomments,html.tostring(k)
214
+ # print 'for comment',numcomments,':'
215
+ # print unicode(l.text_content()).encode('utf-8')
216
+ # print 'found span with style:\n\n',lsafen(html.tostring(j),wrap = 60)
217
+ # if j.attrib['style'] == 'mso-element:comment':
218
+ # print 'found div with style:\n\n',lsafen(j.attrib,wrap = 60)
219
+ # print "found p with class MsoCommentText:"
220
+ # print unicode(k.text_content()).encode('utf-8')
221
+ # j.drop_tree()
222
+ # j.append("a comment found here")
223
+ # commentlist.append(j)
224
+ print("I found %d comment lists and %d comments" % (num, numcomments))
225
+ initial_translation_dict = {
226
+ "JF": "john",
227
+ "y": "yuan",
228
+ "CoLA&S": "peter",
229
+ "SH": "songi",
230
+ "PQ": "peter",
231
+ "KE": "keith",
232
+ }
233
+ commentlabel_re = re.compile(r"\[([A-Za-z&]+)([0-9]+)\]")
234
+ commentid_re = re.compile(r"_anchor_([0-9]+)")
235
+ numcomrefs = 0
236
+ numcomrefsrepd = 0
237
+ comment_file_text = ""
238
+ current_comment_number = 0
239
+ for thiscommentreference in doc.find_class("MsoCommentReference"):
240
+ thiscommentreference.drop_tag()
241
+ for thiscommentreference in doc.find_class("msocomanchor"):
242
+ comref_text = thiscommentreference.text
243
+ if comref_text is not None:
244
+ m = commentlabel_re.match(comref_text)
245
+ if m:
246
+ initials, number = m.groups()
247
+ try:
248
+ print(
249
+ "I found comment %s by %s"
250
+ % (number, initial_translation_dict[initials])
251
+ )
252
+ except KeyError:
253
+ raise ValueError(
254
+ "I don't know who %s is -- add to initial_translation_dict"
255
+ % initials
256
+ )
257
+ thiscommentreference.text = ""
258
+ thiscommentreference.drop_tag()
259
+ prevcomrefsrepd = numcomrefsrepd
260
+ for k in doc.xpath(
261
+ 'descendant-or-self::*[contains(@style,"mso-comment-reference:%s_%s;")]'
262
+ % (initials, number)
263
+ ):
264
+ print("\nThis reference has the text:", html.tostring(k))
265
+ if k.text is None:
266
+ k.text = ""
267
+ empty_tag = False
268
+ if k.text == "":
269
+ empty_tag = True
270
+ if number not in list(comment_dict.keys()):
271
+ raise KeyError(
272
+ repr(number)
273
+ + "is not in comment_dict keys: "
274
+ + repr(list(comment_dict.keys()))
275
+ )
276
+ if (
277
+ (len(comment_dict[number]) > 13)
278
+ and (comment_dict[number][:14] == "(need to do:) ")
279
+ and (initial_translation_dict[initials] == "john")
280
+ ): # if it's a "need to do"
281
+ # k.text = r'\%s['%('ntd')+k.text_content().replace('[',' ').replace(']',' ')+']{'+comment_dict[number][14:]+'}'
282
+ k.text = (
283
+ r"\%s%s{"
284
+ % (
285
+ "ntd",
286
+ generate_alphabetnumber(current_comment_number),
287
+ )
288
+ + k.text_content().replace("[", " ").replace("]", " ")
289
+ + "}"
290
+ )
291
+ comment_file_text += comment_definition(
292
+ "ntd"
293
+ + generate_alphabetnumber(current_comment_number),
294
+ "ntd",
295
+ comment_dict[number][14:],
296
+ )
297
+ current_comment_number += 1
298
+ else:
299
+ k.text = (
300
+ r"\%s%s{"
301
+ % (
302
+ initial_translation_dict[initials],
303
+ generate_alphabetnumber(current_comment_number),
304
+ )
305
+ + k.text_content().replace("[", " ").replace("]", " ")
306
+ + "}"
307
+ )
308
+ comment_file_text += comment_definition(
309
+ initial_translation_dict[initials]
310
+ + generate_alphabetnumber(current_comment_number),
311
+ initial_translation_dict[initials],
312
+ comment_dict[number],
313
+ )
314
+ current_comment_number += 1
315
+ k.drop_tag()
316
+ print("I convert it to this:", html.tostring(k))
317
+ numcomrefsrepd += 1
318
+ # if numcomrefsrepd > prevcomrefsrepd+1:
319
+ # if not empty_tag: raise RuntimeError("Warning: For some reason this comment is referenced twice!!:\n\n"+html.tostring(thiscommentreference))
320
+ if prevcomrefsrepd == numcomrefsrepd:
321
+ print(
322
+ "Warning: I can't find the highlighted text for the comment:\n\n"
323
+ + html.tostring(thiscommentreference)
324
+ + "so I'm dropping it"
325
+ )
326
+ else:
327
+ raise RuntimeError("Warning, I couldn't parse this!!")
328
+ numcomrefs += 1
329
+ else:
330
+ print("Warning, found a comment with no text")
331
+ print(
332
+ "I found %d comment references and replaced %d"
333
+ % (numcomrefs, numcomrefsrepd)
334
+ )
335
+ if manual_math_conversion:
336
+ for j in doc.xpath("//sub"):
337
+ thistext = j.text_content()
338
+ # {{{ remove children
339
+ for l in j.getchildren():
340
+ l.drop_tree()
341
+ # }}}
342
+ if len(thistext) > 0:
343
+ if j.tail is None:
344
+ j.tail = ""
345
+ thistail = j.tail
346
+ j.tail = ""
347
+ j.text = (
348
+ "%%ENTERMATHMODE%%_{%s}%%LEAVEMATHMODE%%" % thistext + thistail
349
+ )
350
+ # j.text = '\\ensuremath{_{'+inlineequation_re.sub('\1',j.text)
351
+ # j.tail = inlineequation_re.sub('\1',j.tail)+'}}'
352
+ j.drop_tag()
353
+ for j in doc.xpath("//sup"):
354
+ thistext = j.text_content().encode("utf-8")
355
+ # {{{ remove children
356
+ for l in j.getchildren():
357
+ l.drop_tree()
358
+ # }}}
359
+ if len(thistext) > 0:
360
+ if j.tail is None:
361
+ j.tail = ""
362
+ thistail = str(j.tail)
363
+ j.tail = ""
364
+ j.text = (
365
+ "%%ENTERMATHMODE%%^{%s}%%LEAVEMATHMODE%%" % thistext + thistail
366
+ )
367
+ j.drop_tag()
368
+ # for j in doc.xpath('//*[contains(@class,"cmmi")]'):
369
+ for mathmodefontsize in [7, 8, 12, 81, 121]:
370
+ for mathmodefonttype in ["cmmi", "cmr", "cmsy"]:
371
+ for j in doc.find_class(
372
+ "%s-%d" % (mathmodefonttype, mathmodefontsize)
373
+ ): # find the math-mode stuff
374
+ thistext = str(unidecode(j.text_content()))
375
+ # {{{ remove children
376
+ for l in j.getchildren():
377
+ l.drop_tree()
378
+ # }}}
379
+ if len(thistext) > 0:
380
+ if j.tail is None:
381
+ j.tail = ""
382
+ thistail = unidecode(j.tail)
383
+ j.tail = ""
384
+ j.text = (
385
+ "%%ENTERMATHMODE%%%s%%LEAVEMATHMODE%%" % thistext
386
+ + thistail
387
+ )
388
+ # j.text = '\\ensuremath{_{'+inlineequation_re.sub('\1',j.text)
389
+ # j.tail = inlineequation_re.sub('\1',j.tail)+'}}'
390
+ j.drop_tag()
391
+ symbol_lookup = {
392
+ "x": "\\xi ",
393
+ "p": "\\pi",
394
+ "k": "\\kappa",
395
+ "s": "\\sigma",
396
+ "y": "\\psi",
397
+ "h": "\\eta",
398
+ "N": "\\Nu",
399
+ "n": "\\nu",
400
+ "e": "\\epsilon",
401
+ "o": "\\omicron",
402
+ "r": "\\rho",
403
+ " ": " ",
404
+ "_": "_",
405
+ "{": "{",
406
+ "}": "}",
407
+ }
408
+ for j in doc.find_class("GramE"):
409
+ j.drop_tag()
410
+ for j in doc.xpath('//*[contains(@style,"font-family:Symbol")]'):
411
+ newtext = "%ENTERMATHMODE%"
412
+ thistail = str(j.tail)
413
+ j.tail = ""
414
+ thistext = str(j.text)
415
+ k_index = 0
416
+ while k_index < len(thistext):
417
+ k = thistext[k_index]
418
+ while k_index < len(thistext) and k == "\\":
419
+ print("found command")
420
+ print("pass %s\n" % k)
421
+ newtext = newtext + k
422
+ k_index += 1
423
+ k = thistext[k_index]
424
+ while k_index < len(thistext) and k not in [" ", "\\", "{"]:
425
+ # gobble up commands
426
+ print("pass %s\n" % k)
427
+ newtext = newtext + k
428
+ k_index += 1
429
+ k = thistext[k_index]
430
+ try:
431
+ newtext = newtext + symbol_lookup[k]
432
+ except:
433
+ raise ValueError(
434
+ "symbol for symbol font '%s' not found! Open the script and put it in the symbol_lookup dictionary"
435
+ % k
436
+ )
437
+ k_index += 1
438
+ newtext = newtext + "%LEAVEMATHMODE%"
439
+ j.text = newtext + thistail
440
+ j.drop_tag()
441
+ # print lsafen(map(html.tostring,newlist),wrap = 60)
442
+ newfile = re.sub(r"(.*)(\.htm.*)", r"\1_texcomm\2", sys.argv[1])
443
+ fp = open(newfile, "w")
444
+ content = html.tostring(doc)
445
+ # content = content.replace('$$','')
446
+ for mathmodefonttype in ["cmmi", "cmr", "cmsy"]:
447
+ if content.find("class=%s-" % mathmodefonttype) > 0:
448
+ raise ValueError(
449
+ "error, I see a string '%s' which indicates math mode, but apparently you're not searching for the correct font size, so go add the font into the list of math mode font sizes"
450
+ % content[
451
+ content.find("%s-" % mathmodefonttype) : content.find(
452
+ "%s-" % mathmodefonttype
453
+ )
454
+ + 14
455
+ ]
456
+ )
457
+ content_list = list(content)
458
+ inmathmode = False
459
+ for j in range(0, len(content_list)):
460
+ if content_list[j] == "$":
461
+ if content_list[j - 1] != "\\":
462
+ if inmathmode:
463
+ content_list[j] = "%LEAVEMATHMODE%"
464
+ inmathmode = False
465
+ else:
466
+ content_list[j] = "%ENTERMATHMODE%"
467
+ inmathmode = TRUE
468
+ content = "".join(content_list)
469
+
470
+
471
+ # content = content.replace('%ENTERMATHMODE%','$')
472
+ # content = content.replace('%LEAVEMATHMODE%','$')
473
+ def decodemathmode(arg):
474
+ for j in range(0, 20):
475
+ # just take a couple more passes to be sure
476
+ # arg = re.sub(r'\\ensuremath{(.*)}( *)\\ensuremath{(.*)}',r'\\ensuremath{\1\2\3}',arg)
477
+ arg = re.sub(
478
+ r"([(),\.0-9]*)%LEAVEMATHMODE%([(),\.0-9]*)%ENTERMATHMODE%([(),\.0-9]*)",
479
+ r"\1\2\3",
480
+ arg,
481
+ )
482
+ arg = re.sub(r"_{([^}]*)}_{([^}]*)}", r"_{\1\2}", arg)
483
+ arg = re.sub(r"\^{([^}]*)}\^{([^}]*)}", r"^{\1\2}", arg)
484
+ nextenter = arg.find("%ENTERMATHMODE%")
485
+ while nextenter > 0:
486
+ arg = arg.replace("%ENTERMATHMODE%", "$", 1)
487
+ nextenter = arg.find("%ENTERMATHMODE%")
488
+ nextexit = arg.find("%LEAVEMATHMODE%")
489
+ replaced = True # just to start the loop
490
+ while replaced:
491
+ if (
492
+ nextenter < nextexit
493
+ ): # there is a math mode inside this one, so gobble it up
494
+ arg = arg.replace("%ENTERMATHMODE%", "", 1)
495
+ arg = arg.replace("%LEAVEMATHMODE%", "", 1)
496
+ nextenter = arg.find("%ENTERMATHMODE%")
497
+ nextexit = arg.find("%LEAVEMATHMODE%")
498
+ replaced = True
499
+ else:
500
+ arg = arg.replace(
501
+ "%LEAVEMATHMODE%", "$", 1
502
+ ) # close this math environment
503
+ replaced = False
504
+ nextenter = arg.find("%ENTERMATHMODE%")
505
+ print("next enter is at", nextenter)
506
+ return arg
507
+
508
+
509
+ content = decodemathmode(content)
510
+ fp.write(content)
511
+ # fp.write('\n'.join(map(html.tostring,newlist)))
512
+ fp.close()
513
+ fp = open(newfile, "r")
514
+ content = fp.read()
515
+ fp.close()
516
+ textfile = re.sub(r"(.*)(\.htm.*)", r"\1.txt", newfile)
517
+ doc = html.fromstring(content)
518
+ fp = open(textfile, "w")
519
+ fp.write(unidecode(doc.text_content()))
520
+ fp.close()
521
+ textfile = re.sub(r"(.*)(\.htm.*)", r"\1_comments.tex", newfile)
522
+ fp = open(textfile, "w")
523
+ fp.write(decodemathmode(comment_file_text).encode("utf-8"))
524
+ fp.close()