myers_diff 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/myers_diff/char_diff.rb +181 -0
- data/lib/myers_diff/version.rb +3 -0
- data/lib/myers_diff.rb +1 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 30beabf5813fa2a01a7b2d599b5041afb61c8ec6a1d91c0298cfb9bde2c9a9eb
|
4
|
+
data.tar.gz: a5eaeef03046a444bfa566629b9cf38f4f4f97604423dc7cbb509ad35c27f616
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 534e00e7e71ffa1a878c4f885af92fed85758ad927dde76f8ac1e89b4e4a2b5534bab2cff073df6ec23ced8f16a6ebe777a6e3efe8664e3e282c151aeefabb0b
|
7
|
+
data.tar.gz: 5808a27d91717e5b8fb280f35d23d92afbd1d6be8ac854feb308ac0e33a818119c2bd60e8cc540bd475cf02c80f317a848f0b14e1bdbaa682fde73eeeeeae212
|
@@ -0,0 +1,181 @@
|
|
1
|
+
module MyersDiff
|
2
|
+
class CharDiff
|
3
|
+
def diff(s1, s2, **options)
|
4
|
+
old_string = cast_input(s1)
|
5
|
+
new_string = cast_input(s2)
|
6
|
+
|
7
|
+
old_string = remove_empty(tokenize(old_string))
|
8
|
+
new_string = remove_empty(tokenize(new_string))
|
9
|
+
|
10
|
+
new_len = new_string.size
|
11
|
+
old_len = old_string.size
|
12
|
+
edit_length = 1
|
13
|
+
max_edit_length = new_len + old_len
|
14
|
+
best_path = { }
|
15
|
+
best_path[0] = { new_pos: -1, components: [] }
|
16
|
+
|
17
|
+
old_pos = extract_common(best_path[0], new_string, old_string, 0)
|
18
|
+
if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
|
19
|
+
return [ { value: join(new_string), count: new_string.size } ]
|
20
|
+
end
|
21
|
+
|
22
|
+
exec_edit_length = lambda do
|
23
|
+
diagonal_path = -1 * edit_length
|
24
|
+
while diagonal_path <= edit_length
|
25
|
+
add_path = best_path[diagonal_path - 1]
|
26
|
+
remove_path = best_path[diagonal_path + 1]
|
27
|
+
old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path
|
28
|
+
best_path[diagonal_path - 1] = nil if add_path
|
29
|
+
|
30
|
+
can_add = add_path && add_path[:new_pos] + 1 < new_len
|
31
|
+
can_remove = remove_path && 0 <= old_pos && old_pos < old_len
|
32
|
+
if !can_add && !can_remove
|
33
|
+
best_path[diagonal_path] = nil
|
34
|
+
diagonal_path += 2
|
35
|
+
next
|
36
|
+
end
|
37
|
+
|
38
|
+
base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos])
|
39
|
+
p = clone_path(remove_path)
|
40
|
+
push_component(p[:components], nil, true)
|
41
|
+
p
|
42
|
+
else
|
43
|
+
p = add_path
|
44
|
+
p[:new_pos] += 1
|
45
|
+
push_component(p[:components], true, nil)
|
46
|
+
p
|
47
|
+
end
|
48
|
+
|
49
|
+
old_pos = extract_common(base_path, new_string, old_string, diagonal_path)
|
50
|
+
|
51
|
+
if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
|
52
|
+
return build_values(base_path[:components], new_string, old_string)
|
53
|
+
else
|
54
|
+
best_path[diagonal_path] = base_path
|
55
|
+
end
|
56
|
+
|
57
|
+
diagonal_path += 2
|
58
|
+
end
|
59
|
+
|
60
|
+
edit_length += 1
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
while edit_length <= max_edit_length
|
65
|
+
if res = exec_edit_length.call
|
66
|
+
return res
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
'death'
|
71
|
+
end
|
72
|
+
|
73
|
+
def push_component(components, added, removed)
|
74
|
+
last = components.last
|
75
|
+
if last && last[:added] == added && last[:removed] == removed
|
76
|
+
components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 }
|
77
|
+
else
|
78
|
+
components.push(count: 1, added: added, removed: removed)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# base_path : { new_pos: int, components: [] }
|
83
|
+
# diagonal_path : int
|
84
|
+
def extract_common(base_path, new_string, old_string, diagonal_path)
|
85
|
+
new_len = new_string.size
|
86
|
+
old_len = old_string.size
|
87
|
+
new_pos = base_path[:new_pos]
|
88
|
+
old_pos = new_pos - diagonal_path
|
89
|
+
common_count = 0
|
90
|
+
|
91
|
+
while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1])
|
92
|
+
new_pos += 1
|
93
|
+
old_pos += 1
|
94
|
+
common_count += 1
|
95
|
+
end
|
96
|
+
|
97
|
+
if common_count > 0
|
98
|
+
base_path[:components].push(count: common_count)
|
99
|
+
end
|
100
|
+
|
101
|
+
base_path[:new_pos] = new_pos
|
102
|
+
old_pos
|
103
|
+
end
|
104
|
+
|
105
|
+
def equals(l, r)
|
106
|
+
l == r
|
107
|
+
# TODO: support custom comparator
|
108
|
+
# TODO: support case-insensitive
|
109
|
+
end
|
110
|
+
|
111
|
+
def remove_empty(array)
|
112
|
+
array.compact
|
113
|
+
end
|
114
|
+
|
115
|
+
def cast_input(str)
|
116
|
+
str
|
117
|
+
end
|
118
|
+
|
119
|
+
def tokenize(str)
|
120
|
+
str.split('')
|
121
|
+
end
|
122
|
+
|
123
|
+
def join(chars)
|
124
|
+
chars.join('')
|
125
|
+
end
|
126
|
+
|
127
|
+
# new_string - tokenized string i.e. array of strings
|
128
|
+
def build_values(components, new_string, old_string, use_longest_token = true)
|
129
|
+
component_pos = 0
|
130
|
+
component_len = components.size
|
131
|
+
new_pos = 0
|
132
|
+
old_pos = 0
|
133
|
+
|
134
|
+
while component_pos < component_len
|
135
|
+
component = components[component_pos]
|
136
|
+
if !component[:removed]
|
137
|
+
if !component[:added] && use_longest_token
|
138
|
+
value = new_string[new_pos, component[:count]]
|
139
|
+
value = value.map.with_index do |val, i|
|
140
|
+
old_val = old_string[old_pos + i]
|
141
|
+
old_val.size > val.size ? old_val : val
|
142
|
+
end
|
143
|
+
|
144
|
+
component[:value] = join(value)
|
145
|
+
else
|
146
|
+
component[:value] = join(new_string[new_pos, component[:count]])
|
147
|
+
end
|
148
|
+
|
149
|
+
new_pos += component[:count]
|
150
|
+
old_pos += component[:count] unless component[:added]
|
151
|
+
else
|
152
|
+
component[:value] = join(old_string[old_pos, component[:count]])
|
153
|
+
old_pos += component[:count]
|
154
|
+
|
155
|
+
if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added]
|
156
|
+
tmp = components[component_pos - 1]
|
157
|
+
components[component_pos - 1] = components[component_pos]
|
158
|
+
components[component_pos] = tmp
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
component_pos += 1
|
163
|
+
end
|
164
|
+
|
165
|
+
last_component = components[component_len - 1]
|
166
|
+
if component_len > 1 &&
|
167
|
+
last_component[:value].is_a?(String) &&
|
168
|
+
(last_component[:added] || last_component[:removed]) &&
|
169
|
+
equals('', last_component[:value])
|
170
|
+
components[component_len - 2][:value] += last_component[:value]
|
171
|
+
components.pop
|
172
|
+
end
|
173
|
+
|
174
|
+
components
|
175
|
+
end
|
176
|
+
|
177
|
+
def clone_path(path_hash)
|
178
|
+
{ new_pos: path_hash[:new_pos], components: path_hash[:components].dup }
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/lib/myers_diff.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'myers_diff/char_diff'
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: myers_diff
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alex Tsui
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-05-22 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Implementation of Myers 1986 text diff algorithmthat started as a port
|
14
|
+
of the jsdiff project withplans to branch out to human-friendly diffs
|
15
|
+
email: alextsui@pm.me
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/myers_diff.rb
|
21
|
+
- lib/myers_diff/char_diff.rb
|
22
|
+
- lib/myers_diff/version.rb
|
23
|
+
homepage: https://github.com/alextsui05/myers_diff
|
24
|
+
licenses:
|
25
|
+
- MIT
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubygems_version: 3.0.8
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Implementation of Myers 1986 text diff algorithm
|
46
|
+
test_files: []
|