myers_diff 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 30beabf5813fa2a01a7b2d599b5041afb61c8ec6a1d91c0298cfb9bde2c9a9eb
4
+ data.tar.gz: a5eaeef03046a444bfa566629b9cf38f4f4f97604423dc7cbb509ad35c27f616
5
+ SHA512:
6
+ metadata.gz: 534e00e7e71ffa1a878c4f885af92fed85758ad927dde76f8ac1e89b4e4a2b5534bab2cff073df6ec23ced8f16a6ebe777a6e3efe8664e3e282c151aeefabb0b
7
+ data.tar.gz: 5808a27d91717e5b8fb280f35d23d92afbd1d6be8ac854feb308ac0e33a818119c2bd60e8cc540bd475cf02c80f317a848f0b14e1bdbaa682fde73eeeeeae212
@@ -0,0 +1,181 @@
1
+ module MyersDiff
2
+ class CharDiff
3
+ def diff(s1, s2, **options)
4
+ old_string = cast_input(s1)
5
+ new_string = cast_input(s2)
6
+
7
+ old_string = remove_empty(tokenize(old_string))
8
+ new_string = remove_empty(tokenize(new_string))
9
+
10
+ new_len = new_string.size
11
+ old_len = old_string.size
12
+ edit_length = 1
13
+ max_edit_length = new_len + old_len
14
+ best_path = { }
15
+ best_path[0] = { new_pos: -1, components: [] }
16
+
17
+ old_pos = extract_common(best_path[0], new_string, old_string, 0)
18
+ if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
19
+ return [ { value: join(new_string), count: new_string.size } ]
20
+ end
21
+
22
+ exec_edit_length = lambda do
23
+ diagonal_path = -1 * edit_length
24
+ while diagonal_path <= edit_length
25
+ add_path = best_path[diagonal_path - 1]
26
+ remove_path = best_path[diagonal_path + 1]
27
+ old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path
28
+ best_path[diagonal_path - 1] = nil if add_path
29
+
30
+ can_add = add_path && add_path[:new_pos] + 1 < new_len
31
+ can_remove = remove_path && 0 <= old_pos && old_pos < old_len
32
+ if !can_add && !can_remove
33
+ best_path[diagonal_path] = nil
34
+ diagonal_path += 2
35
+ next
36
+ end
37
+
38
+ base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos])
39
+ p = clone_path(remove_path)
40
+ push_component(p[:components], nil, true)
41
+ p
42
+ else
43
+ p = add_path
44
+ p[:new_pos] += 1
45
+ push_component(p[:components], true, nil)
46
+ p
47
+ end
48
+
49
+ old_pos = extract_common(base_path, new_string, old_string, diagonal_path)
50
+
51
+ if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
52
+ return build_values(base_path[:components], new_string, old_string)
53
+ else
54
+ best_path[diagonal_path] = base_path
55
+ end
56
+
57
+ diagonal_path += 2
58
+ end
59
+
60
+ edit_length += 1
61
+ nil
62
+ end
63
+
64
+ while edit_length <= max_edit_length
65
+ if res = exec_edit_length.call
66
+ return res
67
+ end
68
+ end
69
+
70
+ 'death'
71
+ end
72
+
73
+ def push_component(components, added, removed)
74
+ last = components.last
75
+ if last && last[:added] == added && last[:removed] == removed
76
+ components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 }
77
+ else
78
+ components.push(count: 1, added: added, removed: removed)
79
+ end
80
+ end
81
+
82
+ # base_path : { new_pos: int, components: [] }
83
+ # diagonal_path : int
84
+ def extract_common(base_path, new_string, old_string, diagonal_path)
85
+ new_len = new_string.size
86
+ old_len = old_string.size
87
+ new_pos = base_path[:new_pos]
88
+ old_pos = new_pos - diagonal_path
89
+ common_count = 0
90
+
91
+ while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1])
92
+ new_pos += 1
93
+ old_pos += 1
94
+ common_count += 1
95
+ end
96
+
97
+ if common_count > 0
98
+ base_path[:components].push(count: common_count)
99
+ end
100
+
101
+ base_path[:new_pos] = new_pos
102
+ old_pos
103
+ end
104
+
105
+ def equals(l, r)
106
+ l == r
107
+ # TODO: support custom comparator
108
+ # TODO: support case-insensitive
109
+ end
110
+
111
+ def remove_empty(array)
112
+ array.compact
113
+ end
114
+
115
+ def cast_input(str)
116
+ str
117
+ end
118
+
119
+ def tokenize(str)
120
+ str.split('')
121
+ end
122
+
123
+ def join(chars)
124
+ chars.join('')
125
+ end
126
+
127
+ # new_string - tokenized string i.e. array of strings
128
+ def build_values(components, new_string, old_string, use_longest_token = true)
129
+ component_pos = 0
130
+ component_len = components.size
131
+ new_pos = 0
132
+ old_pos = 0
133
+
134
+ while component_pos < component_len
135
+ component = components[component_pos]
136
+ if !component[:removed]
137
+ if !component[:added] && use_longest_token
138
+ value = new_string[new_pos, component[:count]]
139
+ value = value.map.with_index do |val, i|
140
+ old_val = old_string[old_pos + i]
141
+ old_val.size > val.size ? old_val : val
142
+ end
143
+
144
+ component[:value] = join(value)
145
+ else
146
+ component[:value] = join(new_string[new_pos, component[:count]])
147
+ end
148
+
149
+ new_pos += component[:count]
150
+ old_pos += component[:count] unless component[:added]
151
+ else
152
+ component[:value] = join(old_string[old_pos, component[:count]])
153
+ old_pos += component[:count]
154
+
155
+ if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added]
156
+ tmp = components[component_pos - 1]
157
+ components[component_pos - 1] = components[component_pos]
158
+ components[component_pos] = tmp
159
+ end
160
+ end
161
+
162
+ component_pos += 1
163
+ end
164
+
165
+ last_component = components[component_len - 1]
166
+ if component_len > 1 &&
167
+ last_component[:value].is_a?(String) &&
168
+ (last_component[:added] || last_component[:removed]) &&
169
+ equals('', last_component[:value])
170
+ components[component_len - 2][:value] += last_component[:value]
171
+ components.pop
172
+ end
173
+
174
+ components
175
+ end
176
+
177
+ def clone_path(path_hash)
178
+ { new_pos: path_hash[:new_pos], components: path_hash[:components].dup }
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,3 @@
1
+ module MyersDiff
2
+ VERSION = "1.0.0".freeze
3
+ end
data/lib/myers_diff.rb ADDED
@@ -0,0 +1 @@
1
+ require 'myers_diff/char_diff'
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: myers_diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Alex Tsui
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-05-22 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Implementation of Myers 1986 text diff algorithmthat started as a port
14
+ of the jsdiff project withplans to branch out to human-friendly diffs
15
+ email: alextsui@pm.me
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/myers_diff.rb
21
+ - lib/myers_diff/char_diff.rb
22
+ - lib/myers_diff/version.rb
23
+ homepage: https://github.com/alextsui05/myers_diff
24
+ licenses:
25
+ - MIT
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubygems_version: 3.0.8
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Implementation of Myers 1986 text diff algorithm
46
+ test_files: []