myers_diff 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/myers_diff.rb +1 -0
- data/lib/myers_diff/version.rb +1 -1
- data/lib/myers_diff/word_diff.rb +187 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38ecad0ebd0a04de796e852e6cfc77065daf0b0da1e6384beed39520b4279230
|
4
|
+
data.tar.gz: 285e2e66e2e25facf31c54b2c32d7d45d8bd2f695d7f789f04b84c579ef2abc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1fa47b2e26e99fc72dbda6f16155848de64da9b59da6aa68cc8ecdfca127395b0887c8e4ecd3ecaa2a3acab2e937c7175aa50f35c4a6b8d39bef0c906286b2e3
|
7
|
+
data.tar.gz: eeb8d1d8db1bebda97e164932b4e878374945d1940f8515d2f0b308bd922361fff2e1ae035f5044fb43ea405a283109a407c4a9fc32c145b200526ff3afbe213
|
data/lib/myers_diff.rb
CHANGED
data/lib/myers_diff/version.rb
CHANGED
@@ -0,0 +1,187 @@
|
|
1
|
+
module MyersDiff
|
2
|
+
class WordDiff
|
3
|
+
def diff(s1, s2, **options)
|
4
|
+
old_string = cast_input(s1)
|
5
|
+
new_string = cast_input(s2)
|
6
|
+
|
7
|
+
old_string = remove_empty(tokenize(old_string))
|
8
|
+
new_string = remove_empty(tokenize(new_string))
|
9
|
+
|
10
|
+
new_len = new_string.size
|
11
|
+
old_len = old_string.size
|
12
|
+
edit_length = 1
|
13
|
+
max_edit_length = new_len + old_len
|
14
|
+
best_path = { }
|
15
|
+
best_path[0] = { new_pos: -1, components: [] }
|
16
|
+
|
17
|
+
old_pos = extract_common(best_path[0], new_string, old_string, 0)
|
18
|
+
if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
|
19
|
+
return [ { value: join(new_string), count: new_string.size } ]
|
20
|
+
end
|
21
|
+
|
22
|
+
exec_edit_length = lambda do
|
23
|
+
diagonal_path = -1 * edit_length
|
24
|
+
while diagonal_path <= edit_length
|
25
|
+
add_path = best_path[diagonal_path - 1]
|
26
|
+
remove_path = best_path[diagonal_path + 1]
|
27
|
+
old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path
|
28
|
+
best_path[diagonal_path - 1] = nil if add_path
|
29
|
+
|
30
|
+
can_add = add_path && add_path[:new_pos] + 1 < new_len
|
31
|
+
can_remove = remove_path && 0 <= old_pos && old_pos < old_len
|
32
|
+
if !can_add && !can_remove
|
33
|
+
best_path[diagonal_path] = nil
|
34
|
+
diagonal_path += 2
|
35
|
+
next
|
36
|
+
end
|
37
|
+
|
38
|
+
base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos])
|
39
|
+
p = clone_path(remove_path)
|
40
|
+
push_component(p[:components], nil, true)
|
41
|
+
p
|
42
|
+
else
|
43
|
+
p = add_path
|
44
|
+
p[:new_pos] += 1
|
45
|
+
push_component(p[:components], true, nil)
|
46
|
+
p
|
47
|
+
end
|
48
|
+
|
49
|
+
old_pos = extract_common(base_path, new_string, old_string, diagonal_path)
|
50
|
+
|
51
|
+
if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
|
52
|
+
return build_values(base_path[:components], new_string, old_string)
|
53
|
+
else
|
54
|
+
best_path[diagonal_path] = base_path
|
55
|
+
end
|
56
|
+
|
57
|
+
diagonal_path += 2
|
58
|
+
end
|
59
|
+
|
60
|
+
edit_length += 1
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
while edit_length <= max_edit_length
|
65
|
+
if res = exec_edit_length.call
|
66
|
+
return res
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
'death'
|
71
|
+
end
|
72
|
+
|
73
|
+
def push_component(components, added, removed)
|
74
|
+
last = components.last
|
75
|
+
if last && last[:added] == added && last[:removed] == removed
|
76
|
+
components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 }
|
77
|
+
else
|
78
|
+
components.push(count: 1, added: added, removed: removed)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# base_path : { new_pos: int, components: [] }
|
83
|
+
# diagonal_path : int
|
84
|
+
def extract_common(base_path, new_string, old_string, diagonal_path)
|
85
|
+
new_len = new_string.size
|
86
|
+
old_len = old_string.size
|
87
|
+
new_pos = base_path[:new_pos]
|
88
|
+
old_pos = new_pos - diagonal_path
|
89
|
+
common_count = 0
|
90
|
+
|
91
|
+
while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1])
|
92
|
+
new_pos += 1
|
93
|
+
old_pos += 1
|
94
|
+
common_count += 1
|
95
|
+
end
|
96
|
+
|
97
|
+
if common_count > 0
|
98
|
+
base_path[:components].push(count: common_count)
|
99
|
+
end
|
100
|
+
|
101
|
+
base_path[:new_pos] = new_pos
|
102
|
+
old_pos
|
103
|
+
end
|
104
|
+
|
105
|
+
def equals(l, r)
|
106
|
+
l == r
|
107
|
+
# TODO: support custom comparator
|
108
|
+
# TODO: support case-insensitive
|
109
|
+
end
|
110
|
+
|
111
|
+
def remove_empty(array)
|
112
|
+
array.compact
|
113
|
+
end
|
114
|
+
|
115
|
+
def cast_input(str)
|
116
|
+
str
|
117
|
+
end
|
118
|
+
|
119
|
+
def tokenize(str)
|
120
|
+
str.split(' ')
|
121
|
+
end
|
122
|
+
|
123
|
+
def join(chars)
|
124
|
+
chars.join(' ')
|
125
|
+
end
|
126
|
+
|
127
|
+
# new_string - tokenized string i.e. array of strings
|
128
|
+
def build_values(components, new_string, old_string, use_longest_token = true)
|
129
|
+
component_pos = 0
|
130
|
+
component_len = components.size
|
131
|
+
new_pos = 0
|
132
|
+
old_pos = 0
|
133
|
+
|
134
|
+
while component_pos < component_len
|
135
|
+
component = components[component_pos]
|
136
|
+
if !component[:removed]
|
137
|
+
if !component[:added] && use_longest_token
|
138
|
+
value = new_string[new_pos, component[:count]]
|
139
|
+
value = value.map.with_index do |val, i|
|
140
|
+
old_val = old_string[old_pos + i]
|
141
|
+
old_val.size > val.size ? old_val : val
|
142
|
+
end
|
143
|
+
|
144
|
+
component[:value] = join(value)
|
145
|
+
else
|
146
|
+
component[:value] = join(new_string[new_pos, component[:count]])
|
147
|
+
end
|
148
|
+
|
149
|
+
new_pos += component[:count]
|
150
|
+
old_pos += component[:count] unless component[:added]
|
151
|
+
else
|
152
|
+
component[:value] = join(old_string[old_pos, component[:count]])
|
153
|
+
old_pos += component[:count]
|
154
|
+
|
155
|
+
if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added]
|
156
|
+
tmp = components[component_pos - 1]
|
157
|
+
components[component_pos - 1] = components[component_pos]
|
158
|
+
components[component_pos] = tmp
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
component_pos += 1
|
163
|
+
end
|
164
|
+
|
165
|
+
last_component = components[component_len - 1]
|
166
|
+
if component_len > 1 &&
|
167
|
+
last_component[:value].is_a?(String) &&
|
168
|
+
(last_component[:added] || last_component[:removed]) &&
|
169
|
+
equals('', last_component[:value])
|
170
|
+
components[component_len - 2][:value] += last_component[:value]
|
171
|
+
components.pop
|
172
|
+
end
|
173
|
+
|
174
|
+
recount(components)
|
175
|
+
|
176
|
+
components
|
177
|
+
end
|
178
|
+
|
179
|
+
def clone_path(path_hash)
|
180
|
+
{ new_pos: path_hash[:new_pos], components: path_hash[:components].dup }
|
181
|
+
end
|
182
|
+
|
183
|
+
def recount(components)
|
184
|
+
components.each { |component| component[:count] = component[:value].size }
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: myers_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex Tsui
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-05-
|
11
|
+
date: 2020-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: Implementation of Myers 1986 text diff
|
14
|
-
of the jsdiff project
|
13
|
+
description: Implementation of Myers 1986 text diff algorithm that started as a port
|
14
|
+
of the jsdiff project with plans to branch out to human-friendly diffs
|
15
15
|
email: alextsui@pm.me
|
16
16
|
executables: []
|
17
17
|
extensions: []
|
@@ -20,6 +20,7 @@ files:
|
|
20
20
|
- lib/myers_diff.rb
|
21
21
|
- lib/myers_diff/char_diff.rb
|
22
22
|
- lib/myers_diff/version.rb
|
23
|
+
- lib/myers_diff/word_diff.rb
|
23
24
|
homepage: https://github.com/alextsui05/myers_diff
|
24
25
|
licenses:
|
25
26
|
- MIT
|