pyDiffTools 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,235 @@
1
+ from difflib import SequenceMatcher
2
+
3
+
4
+ def run(arguments):
5
+ with open(arguments[0], encoding="utf-8") as fp:
6
+ text1 = fp.read()
7
+ # text1 = text1.decode('utf-8')
8
+ fp = open(arguments[1], encoding="utf-8")
9
+ text2 = fp.read()
10
+ fp.close()
11
+ # text2 = text2.decode('utf-8')
12
+ utf_char = "\u00a0" # unicode no break space
13
+ text2 = text2.replace(utf_char, " ") # replace it
14
+ utf_char = "\u2004" # three-per-em space
15
+ text2 = text2.replace(utf_char, " ") # replace it
16
+
17
+ def parse_whitespace(s):
18
+ retval = []
19
+ white_or_not = []
20
+ current_string = ""
21
+ is_whitespace = True
22
+ for j in s:
23
+ if j in [" ", "\t", "\r", "\n"]:
24
+ if not is_whitespace:
25
+ retval.append(current_string)
26
+ white_or_not.append(
27
+ False
28
+ ) # I have switched to whitespace, I was not whitespace
29
+ current_string = j
30
+ else:
31
+ current_string += j
32
+ is_whitespace = True
33
+ else:
34
+ if is_whitespace and len(retval) > 0:
35
+ retval.append(current_string)
36
+ if current_string.count("\n") > 1:
37
+ white_or_not.append(
38
+ False
39
+ ) # double newline is not "whitespace"
40
+ else:
41
+ white_or_not.append(True)
42
+ current_string = j
43
+ else:
44
+ current_string += j
45
+ is_whitespace = False
46
+ retval.append(current_string)
47
+ white_or_not.append(is_whitespace)
48
+ if is_whitespace and current_string.count("\n") > 1:
49
+ white_or_not.append(False) # double newline is not "whitespace"
50
+ else:
51
+ white_or_not.append(is_whitespace)
52
+ return retval, white_or_not
53
+
54
+ # print zip(*tuple(parse_whitespace(text1)))
55
+ # print zip(*tuple(parse_whitespace(text2)))
56
+
57
+ tokens, iswhitespace = parse_whitespace(text1)
58
+
59
+ def generate_word_lists(input_tokens, input_iswhitespace):
60
+ retval_words = []
61
+ retval_whitespace = []
62
+ retval_isdoublenewline = []
63
+ j = 0
64
+ # go through and add whitespace and words, always in pairs
65
+ while j < len(input_tokens):
66
+ if input_iswhitespace[j]:
67
+ # make it so the whitespace always comes "after" the word
68
+ retval_words.append("")
69
+ retval_whitespace.append(input_tokens[j])
70
+ j += 1
71
+ elif j == len(input_tokens) - 1:
72
+ # this is the last one, so just add it
73
+ retval_words.append(input_tokens[j])
74
+ retval_whitespace.append("")
75
+ retval_isdoublenewline.append(False)
76
+ else: # it's a word
77
+ retval_words.append(input_tokens[j])
78
+ if input_iswhitespace[j + 1]:
79
+ retval_whitespace.append(input_tokens[j + 1])
80
+ j += 2
81
+ else:
82
+ # this can happen if it's a newline combo or followed by a newline combo
83
+ # print repr(input_tokens[j]),'is not followed by whitespace but by',repr(input_tokens[j+1])
84
+ retval_whitespace.append("")
85
+ j += 1
86
+ if retval_words[-1].count("\n") > 1: # double newline
87
+ retval_isdoublenewline.append(True)
88
+ else:
89
+ retval_isdoublenewline.append(False)
90
+ return retval_words, retval_whitespace, retval_isdoublenewline
91
+
92
+ text1_words, text1_whitespace, text1_isdoublenewline = generate_word_lists(
93
+ tokens, iswhitespace
94
+ )
95
+ # print "-------------------"
96
+ # print "align words only with words and whitespace"
97
+ # print zip(text1_words, text1_words_and_whitespace)
98
+ # print "-------------------"
99
+
100
+ tokens, iswhitespace = parse_whitespace(text2)
101
+ text2_words, text2_whitespace, text2_isdoublenewline = generate_word_lists(
102
+ tokens, iswhitespace
103
+ )
104
+
105
+ s = SequenceMatcher(None, text1_words, text2_words)
106
+ diffs = s.get_opcodes()
107
+ # print diffs
108
+ final_text = ""
109
+ newline_debt = 0
110
+ last_indent = ""
111
+ for j in diffs:
112
+ if j[0] == "equal":
113
+ temp_addition = text1_words[j[1] : j[2]]
114
+ whitespace = text1_whitespace[j[1] : j[2]]
115
+ for k in range(len(temp_addition)):
116
+ final_text += temp_addition[k] + whitespace[k]
117
+ idx = whitespace[k].find("\n")
118
+ if idx > -1:
119
+ last_indent = whitespace[k][idx + 1 :]
120
+ if (
121
+ j[2] - j[1] > 4
122
+ ): # if five or more words have matched, forgive my newline debt
123
+ newline_debt = 0
124
+ elif j[0] == "delete":
125
+ if (
126
+ sum(
127
+ [
128
+ thisstr.count("\n")
129
+ for thisstr in text1_whitespace[j[1] : j[2]]
130
+ ]
131
+ )
132
+ > 0
133
+ ):
134
+ newline_debt += 1
135
+ # print "delete -- newline debt is now",newline_debt
136
+ elif j[0] == "replace":
137
+ print("newline debt", newline_debt)
138
+ newline_debt += sum(
139
+ [
140
+ thisstr.count("\n")
141
+ for thisstr in text1_whitespace[j[1] : j[2]]
142
+ ]
143
+ )
144
+ # print "replace -- newline debt is now",newline_debt
145
+ print(
146
+ "about to replace",
147
+ repr(text1_words[j[1] : j[2]]).encode("unicode-escape"),
148
+ )
149
+ print(
150
+ " with",
151
+ repr(text2_words[j[3] : j[4]]).encode("unicode-escape"),
152
+ )
153
+ print(
154
+ " whitepace from ",
155
+ repr(text1_whitespace[j[1] : j[2]]).encode("unicode-escape"),
156
+ )
157
+ oldver_whitespace = text1_whitespace[j[1] : j[2]]
158
+ print(
159
+ " whitepace to ",
160
+ repr(text2_whitespace[j[3] : j[4]]).encode("unicode-escape"),
161
+ )
162
+ print(" newline debt", newline_debt)
163
+ temp_addition = text2_words[j[3] : j[4]]
164
+ # {{{ check to see if I am adding any double newlines -- if I am use the original version
165
+ temp_isdoublenewline = text2_isdoublenewline[j[3] : j[4]]
166
+ tstdbl_i = 0
167
+ tstdbl_j = 0
168
+ while tstdbl_i < len(temp_isdoublenewline):
169
+ if temp_isdoublenewline[tstdbl_i]:
170
+ matched = False
171
+ while (
172
+ tstdbl_j < len(text1_isdoublenewline[j[1] : j[2]])
173
+ and not matched
174
+ ):
175
+ if text1_isdoublenewline[j[1] : j[2]][tstdbl_j]:
176
+ temp_addition[tstdbl_i] = text1_words[j[1] : j[2]][
177
+ tstdbl_j
178
+ ]
179
+ matched = True
180
+ tstdbl_j += 1
181
+ tstdbl_i += 1
182
+ # }}}
183
+ newver_whitespace = text2_whitespace[j[3] : j[4]]
184
+ whitespace = [
185
+ " " if len(x) > 0 else "" for x in newver_whitespace
186
+ ] # sometimes, the "whitespace" can be nothing
187
+ if newline_debt > 0:
188
+ for k in range(len(temp_addition)):
189
+ if newver_whitespace[k].count("\n") > 0:
190
+ whitespace[k] = "\n" + last_indent
191
+ newline_debt -= whitespace[k].count(
192
+ "\n"
193
+ ) # shouldn't be more than one but doesn't hurt
194
+ if newline_debt < 1:
195
+ break
196
+ # if I can't make up for the whitespace with the new text, but it where it went in the old text
197
+ for k in range(min(len(oldver_whitespace), len(whitespace))):
198
+ if oldver_whitespace[k].count("\n") > 0:
199
+ whitespace[k] = oldver_whitespace[k]
200
+ newline_debt -= whitespace[k].count(
201
+ "\n"
202
+ ) # shouldn't be more than one but doesn't hurt
203
+ if newline_debt < 1:
204
+ break
205
+ print(" whitepace became", repr(whitespace))
206
+ for k in range(len(temp_addition)):
207
+ final_text += temp_addition[k] + whitespace[k]
208
+ idx = whitespace[k].find("\n")
209
+ if idx > -1:
210
+ last_indent = whitespace[k][idx + 1 :]
211
+ elif j[0] == "insert":
212
+ temp_addition = text2_words[j[3] : j[4]]
213
+ newver_whitespace = text2_whitespace[j[3] : j[4]]
214
+ whitespace = [
215
+ " " if len(x) > 0 else "" for x in newver_whitespace
216
+ ] # sometimes, the "whitespace" can be nothing
217
+ if newline_debt > 0:
218
+ for k in range(len(temp_addition)):
219
+ if newver_whitespace[k].count("\n") > 0:
220
+ whitespace[k] = "\n" + last_indent
221
+ newline_debt -= whitespace[k].count(
222
+ "\n"
223
+ ) # shouldn't be more than one but doesn't hurt
224
+ if newline_debt < 1:
225
+ break
226
+ for k in range(len(temp_addition)):
227
+ final_text += temp_addition[k] + whitespace[k]
228
+ idx = whitespace[k].find("\n")
229
+ if idx > -1:
230
+ last_indent = whitespace[k][idx + 1 :]
231
+ else:
232
+ raise ValueError("unknown opcode" + j[0])
233
+ fp = open(arguments[1], "w", encoding="utf-8")
234
+ fp.write(final_text)
235
+ fp.close()
File without changes