pyDiffTools 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydifftools/__init__.py +11 -0
- pydifftools/check_numbers.py +70 -0
- pydifftools/command_line.py +747 -0
- pydifftools/command_registry.py +65 -0
- pydifftools/comment_functions.py +39 -0
- pydifftools/continuous.py +194 -0
- pydifftools/copy_files.py +75 -0
- pydifftools/diff-doc.js +193 -0
- pydifftools/doc_contents.py +147 -0
- pydifftools/flowchart/__init__.py +15 -0
- pydifftools/flowchart/dot_to_yaml.py +114 -0
- pydifftools/flowchart/graph.py +620 -0
- pydifftools/flowchart/watch_graph.py +168 -0
- pydifftools/html_comments.py +33 -0
- pydifftools/html_uncomments.py +524 -0
- pydifftools/match_spaces.py +235 -0
- pydifftools/notebook/__init__.py +0 -0
- pydifftools/notebook/fast_build.py +1502 -0
- pydifftools/notebook/tex_to_qmd.py +319 -0
- pydifftools/onewordify.py +149 -0
- pydifftools/onewordify_undo.py +54 -0
- pydifftools/outline.py +173 -0
- pydifftools/rearrange_tex.py +188 -0
- pydifftools/searchacro.py +80 -0
- pydifftools/separate_comments.py +73 -0
- pydifftools/split_conflict.py +213 -0
- pydifftools/unseparate_comments.py +69 -0
- pydifftools/update_check.py +31 -0
- pydifftools/wrap_sentences.py +501 -0
- pydifftools/xml2xlsx.vbs +33 -0
- pydifftools-0.1.8.dist-info/METADATA +146 -0
- pydifftools-0.1.8.dist-info/RECORD +36 -0
- pydifftools-0.1.8.dist-info/WHEEL +5 -0
- pydifftools-0.1.8.dist-info/entry_points.txt +2 -0
- pydifftools-0.1.8.dist-info/licenses/LICENSE.md +28 -0
- pydifftools-0.1.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
from difflib import SequenceMatcher
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def run(arguments):
|
|
5
|
+
with open(arguments[0], encoding="utf-8") as fp:
|
|
6
|
+
text1 = fp.read()
|
|
7
|
+
# text1 = text1.decode('utf-8')
|
|
8
|
+
fp = open(arguments[1], encoding="utf-8")
|
|
9
|
+
text2 = fp.read()
|
|
10
|
+
fp.close()
|
|
11
|
+
# text2 = text2.decode('utf-8')
|
|
12
|
+
utf_char = "\u00a0" # unicode no break space
|
|
13
|
+
text2 = text2.replace(utf_char, " ") # replace it
|
|
14
|
+
utf_char = "\u2004" # three-per-em space
|
|
15
|
+
text2 = text2.replace(utf_char, " ") # replace it
|
|
16
|
+
|
|
17
|
+
def parse_whitespace(s):
|
|
18
|
+
retval = []
|
|
19
|
+
white_or_not = []
|
|
20
|
+
current_string = ""
|
|
21
|
+
is_whitespace = True
|
|
22
|
+
for j in s:
|
|
23
|
+
if j in [" ", "\t", "\r", "\n"]:
|
|
24
|
+
if not is_whitespace:
|
|
25
|
+
retval.append(current_string)
|
|
26
|
+
white_or_not.append(
|
|
27
|
+
False
|
|
28
|
+
) # I have switched to whitespace, I was not whitespace
|
|
29
|
+
current_string = j
|
|
30
|
+
else:
|
|
31
|
+
current_string += j
|
|
32
|
+
is_whitespace = True
|
|
33
|
+
else:
|
|
34
|
+
if is_whitespace and len(retval) > 0:
|
|
35
|
+
retval.append(current_string)
|
|
36
|
+
if current_string.count("\n") > 1:
|
|
37
|
+
white_or_not.append(
|
|
38
|
+
False
|
|
39
|
+
) # double newline is not "whitespace"
|
|
40
|
+
else:
|
|
41
|
+
white_or_not.append(True)
|
|
42
|
+
current_string = j
|
|
43
|
+
else:
|
|
44
|
+
current_string += j
|
|
45
|
+
is_whitespace = False
|
|
46
|
+
retval.append(current_string)
|
|
47
|
+
white_or_not.append(is_whitespace)
|
|
48
|
+
if is_whitespace and current_string.count("\n") > 1:
|
|
49
|
+
white_or_not.append(False) # double newline is not "whitespace"
|
|
50
|
+
else:
|
|
51
|
+
white_or_not.append(is_whitespace)
|
|
52
|
+
return retval, white_or_not
|
|
53
|
+
|
|
54
|
+
# print zip(*tuple(parse_whitespace(text1)))
|
|
55
|
+
# print zip(*tuple(parse_whitespace(text2)))
|
|
56
|
+
|
|
57
|
+
tokens, iswhitespace = parse_whitespace(text1)
|
|
58
|
+
|
|
59
|
+
def generate_word_lists(input_tokens, input_iswhitespace):
|
|
60
|
+
retval_words = []
|
|
61
|
+
retval_whitespace = []
|
|
62
|
+
retval_isdoublenewline = []
|
|
63
|
+
j = 0
|
|
64
|
+
# go through and add whitespace and words, always in pairs
|
|
65
|
+
while j < len(input_tokens):
|
|
66
|
+
if input_iswhitespace[j]:
|
|
67
|
+
# make it so the whitespace always comes "after" the word
|
|
68
|
+
retval_words.append("")
|
|
69
|
+
retval_whitespace.append(input_tokens[j])
|
|
70
|
+
j += 1
|
|
71
|
+
elif j == len(input_tokens) - 1:
|
|
72
|
+
# this is the last one, so just add it
|
|
73
|
+
retval_words.append(input_tokens[j])
|
|
74
|
+
retval_whitespace.append("")
|
|
75
|
+
retval_isdoublenewline.append(False)
|
|
76
|
+
else: # it's a word
|
|
77
|
+
retval_words.append(input_tokens[j])
|
|
78
|
+
if input_iswhitespace[j + 1]:
|
|
79
|
+
retval_whitespace.append(input_tokens[j + 1])
|
|
80
|
+
j += 2
|
|
81
|
+
else:
|
|
82
|
+
# this can happen if it's a newline combo or followed by a newline combo
|
|
83
|
+
# print repr(input_tokens[j]),'is not followed by whitespace but by',repr(input_tokens[j+1])
|
|
84
|
+
retval_whitespace.append("")
|
|
85
|
+
j += 1
|
|
86
|
+
if retval_words[-1].count("\n") > 1: # double newline
|
|
87
|
+
retval_isdoublenewline.append(True)
|
|
88
|
+
else:
|
|
89
|
+
retval_isdoublenewline.append(False)
|
|
90
|
+
return retval_words, retval_whitespace, retval_isdoublenewline
|
|
91
|
+
|
|
92
|
+
text1_words, text1_whitespace, text1_isdoublenewline = generate_word_lists(
|
|
93
|
+
tokens, iswhitespace
|
|
94
|
+
)
|
|
95
|
+
# print "-------------------"
|
|
96
|
+
# print "align words only with words and whitespace"
|
|
97
|
+
# print zip(text1_words, text1_words_and_whitespace)
|
|
98
|
+
# print "-------------------"
|
|
99
|
+
|
|
100
|
+
tokens, iswhitespace = parse_whitespace(text2)
|
|
101
|
+
text2_words, text2_whitespace, text2_isdoublenewline = generate_word_lists(
|
|
102
|
+
tokens, iswhitespace
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
s = SequenceMatcher(None, text1_words, text2_words)
|
|
106
|
+
diffs = s.get_opcodes()
|
|
107
|
+
# print diffs
|
|
108
|
+
final_text = ""
|
|
109
|
+
newline_debt = 0
|
|
110
|
+
last_indent = ""
|
|
111
|
+
for j in diffs:
|
|
112
|
+
if j[0] == "equal":
|
|
113
|
+
temp_addition = text1_words[j[1] : j[2]]
|
|
114
|
+
whitespace = text1_whitespace[j[1] : j[2]]
|
|
115
|
+
for k in range(len(temp_addition)):
|
|
116
|
+
final_text += temp_addition[k] + whitespace[k]
|
|
117
|
+
idx = whitespace[k].find("\n")
|
|
118
|
+
if idx > -1:
|
|
119
|
+
last_indent = whitespace[k][idx + 1 :]
|
|
120
|
+
if (
|
|
121
|
+
j[2] - j[1] > 4
|
|
122
|
+
): # if five or more words have matched, forgive my newline debt
|
|
123
|
+
newline_debt = 0
|
|
124
|
+
elif j[0] == "delete":
|
|
125
|
+
if (
|
|
126
|
+
sum(
|
|
127
|
+
[
|
|
128
|
+
thisstr.count("\n")
|
|
129
|
+
for thisstr in text1_whitespace[j[1] : j[2]]
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
> 0
|
|
133
|
+
):
|
|
134
|
+
newline_debt += 1
|
|
135
|
+
# print "delete -- newline debt is now",newline_debt
|
|
136
|
+
elif j[0] == "replace":
|
|
137
|
+
print("newline debt", newline_debt)
|
|
138
|
+
newline_debt += sum(
|
|
139
|
+
[
|
|
140
|
+
thisstr.count("\n")
|
|
141
|
+
for thisstr in text1_whitespace[j[1] : j[2]]
|
|
142
|
+
]
|
|
143
|
+
)
|
|
144
|
+
# print "replace -- newline debt is now",newline_debt
|
|
145
|
+
print(
|
|
146
|
+
"about to replace",
|
|
147
|
+
repr(text1_words[j[1] : j[2]]).encode("unicode-escape"),
|
|
148
|
+
)
|
|
149
|
+
print(
|
|
150
|
+
" with",
|
|
151
|
+
repr(text2_words[j[3] : j[4]]).encode("unicode-escape"),
|
|
152
|
+
)
|
|
153
|
+
print(
|
|
154
|
+
" whitepace from ",
|
|
155
|
+
repr(text1_whitespace[j[1] : j[2]]).encode("unicode-escape"),
|
|
156
|
+
)
|
|
157
|
+
oldver_whitespace = text1_whitespace[j[1] : j[2]]
|
|
158
|
+
print(
|
|
159
|
+
" whitepace to ",
|
|
160
|
+
repr(text2_whitespace[j[3] : j[4]]).encode("unicode-escape"),
|
|
161
|
+
)
|
|
162
|
+
print(" newline debt", newline_debt)
|
|
163
|
+
temp_addition = text2_words[j[3] : j[4]]
|
|
164
|
+
# {{{ check to see if I am adding any double newlines -- if I am use the original version
|
|
165
|
+
temp_isdoublenewline = text2_isdoublenewline[j[3] : j[4]]
|
|
166
|
+
tstdbl_i = 0
|
|
167
|
+
tstdbl_j = 0
|
|
168
|
+
while tstdbl_i < len(temp_isdoublenewline):
|
|
169
|
+
if temp_isdoublenewline[tstdbl_i]:
|
|
170
|
+
matched = False
|
|
171
|
+
while (
|
|
172
|
+
tstdbl_j < len(text1_isdoublenewline[j[1] : j[2]])
|
|
173
|
+
and not matched
|
|
174
|
+
):
|
|
175
|
+
if text1_isdoublenewline[j[1] : j[2]][tstdbl_j]:
|
|
176
|
+
temp_addition[tstdbl_i] = text1_words[j[1] : j[2]][
|
|
177
|
+
tstdbl_j
|
|
178
|
+
]
|
|
179
|
+
matched = True
|
|
180
|
+
tstdbl_j += 1
|
|
181
|
+
tstdbl_i += 1
|
|
182
|
+
# }}}
|
|
183
|
+
newver_whitespace = text2_whitespace[j[3] : j[4]]
|
|
184
|
+
whitespace = [
|
|
185
|
+
" " if len(x) > 0 else "" for x in newver_whitespace
|
|
186
|
+
] # sometimes, the "whitespace" can be nothing
|
|
187
|
+
if newline_debt > 0:
|
|
188
|
+
for k in range(len(temp_addition)):
|
|
189
|
+
if newver_whitespace[k].count("\n") > 0:
|
|
190
|
+
whitespace[k] = "\n" + last_indent
|
|
191
|
+
newline_debt -= whitespace[k].count(
|
|
192
|
+
"\n"
|
|
193
|
+
) # shouldn't be more than one but doesn't hurt
|
|
194
|
+
if newline_debt < 1:
|
|
195
|
+
break
|
|
196
|
+
# if I can't make up for the whitespace with the new text, but it where it went in the old text
|
|
197
|
+
for k in range(min(len(oldver_whitespace), len(whitespace))):
|
|
198
|
+
if oldver_whitespace[k].count("\n") > 0:
|
|
199
|
+
whitespace[k] = oldver_whitespace[k]
|
|
200
|
+
newline_debt -= whitespace[k].count(
|
|
201
|
+
"\n"
|
|
202
|
+
) # shouldn't be more than one but doesn't hurt
|
|
203
|
+
if newline_debt < 1:
|
|
204
|
+
break
|
|
205
|
+
print(" whitepace became", repr(whitespace))
|
|
206
|
+
for k in range(len(temp_addition)):
|
|
207
|
+
final_text += temp_addition[k] + whitespace[k]
|
|
208
|
+
idx = whitespace[k].find("\n")
|
|
209
|
+
if idx > -1:
|
|
210
|
+
last_indent = whitespace[k][idx + 1 :]
|
|
211
|
+
elif j[0] == "insert":
|
|
212
|
+
temp_addition = text2_words[j[3] : j[4]]
|
|
213
|
+
newver_whitespace = text2_whitespace[j[3] : j[4]]
|
|
214
|
+
whitespace = [
|
|
215
|
+
" " if len(x) > 0 else "" for x in newver_whitespace
|
|
216
|
+
] # sometimes, the "whitespace" can be nothing
|
|
217
|
+
if newline_debt > 0:
|
|
218
|
+
for k in range(len(temp_addition)):
|
|
219
|
+
if newver_whitespace[k].count("\n") > 0:
|
|
220
|
+
whitespace[k] = "\n" + last_indent
|
|
221
|
+
newline_debt -= whitespace[k].count(
|
|
222
|
+
"\n"
|
|
223
|
+
) # shouldn't be more than one but doesn't hurt
|
|
224
|
+
if newline_debt < 1:
|
|
225
|
+
break
|
|
226
|
+
for k in range(len(temp_addition)):
|
|
227
|
+
final_text += temp_addition[k] + whitespace[k]
|
|
228
|
+
idx = whitespace[k].find("\n")
|
|
229
|
+
if idx > -1:
|
|
230
|
+
last_indent = whitespace[k][idx + 1 :]
|
|
231
|
+
else:
|
|
232
|
+
raise ValueError("unknown opcode" + j[0])
|
|
233
|
+
fp = open(arguments[1], "w", encoding="utf-8")
|
|
234
|
+
fp.write(final_text)
|
|
235
|
+
fp.close()
|
|
File without changes
|