myers_diff 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30beabf5813fa2a01a7b2d599b5041afb61c8ec6a1d91c0298cfb9bde2c9a9eb
4
- data.tar.gz: a5eaeef03046a444bfa566629b9cf38f4f4f97604423dc7cbb509ad35c27f616
3
+ metadata.gz: 38ecad0ebd0a04de796e852e6cfc77065daf0b0da1e6384beed39520b4279230
4
+ data.tar.gz: 285e2e66e2e25facf31c54b2c32d7d45d8bd2f695d7f789f04b84c579ef2abc4
5
5
  SHA512:
6
- metadata.gz: 534e00e7e71ffa1a878c4f885af92fed85758ad927dde76f8ac1e89b4e4a2b5534bab2cff073df6ec23ced8f16a6ebe777a6e3efe8664e3e282c151aeefabb0b
7
- data.tar.gz: 5808a27d91717e5b8fb280f35d23d92afbd1d6be8ac854feb308ac0e33a818119c2bd60e8cc540bd475cf02c80f317a848f0b14e1bdbaa682fde73eeeeeae212
6
+ metadata.gz: 1fa47b2e26e99fc72dbda6f16155848de64da9b59da6aa68cc8ecdfca127395b0887c8e4ecd3ecaa2a3acab2e937c7175aa50f35c4a6b8d39bef0c906286b2e3
7
+ data.tar.gz: eeb8d1d8db1bebda97e164932b4e878374945d1940f8515d2f0b308bd922361fff2e1ae035f5044fb43ea405a283109a407c4a9fc32c145b200526ff3afbe213
@@ -1 +1,2 @@
1
1
  require 'myers_diff/char_diff'
2
+ require 'myers_diff/word_diff'
@@ -1,3 +1,3 @@
1
1
  module MyersDiff
2
- VERSION = "1.0.0".freeze
2
+ VERSION = "1.1.0".freeze
3
3
  end
@@ -0,0 +1,187 @@
1
+ module MyersDiff
2
+ class WordDiff
3
+ def diff(s1, s2, **options)
4
+ old_string = cast_input(s1)
5
+ new_string = cast_input(s2)
6
+
7
+ old_string = remove_empty(tokenize(old_string))
8
+ new_string = remove_empty(tokenize(new_string))
9
+
10
+ new_len = new_string.size
11
+ old_len = old_string.size
12
+ edit_length = 1
13
+ max_edit_length = new_len + old_len
14
+ best_path = { }
15
+ best_path[0] = { new_pos: -1, components: [] }
16
+
17
+ old_pos = extract_common(best_path[0], new_string, old_string, 0)
18
+ if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
19
+ return [ { value: join(new_string), count: new_string.size } ]
20
+ end
21
+
22
+ exec_edit_length = lambda do
23
+ diagonal_path = -1 * edit_length
24
+ while diagonal_path <= edit_length
25
+ add_path = best_path[diagonal_path - 1]
26
+ remove_path = best_path[diagonal_path + 1]
27
+ old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path
28
+ best_path[diagonal_path - 1] = nil if add_path
29
+
30
+ can_add = add_path && add_path[:new_pos] + 1 < new_len
31
+ can_remove = remove_path && 0 <= old_pos && old_pos < old_len
32
+ if !can_add && !can_remove
33
+ best_path[diagonal_path] = nil
34
+ diagonal_path += 2
35
+ next
36
+ end
37
+
38
+ base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos])
39
+ p = clone_path(remove_path)
40
+ push_component(p[:components], nil, true)
41
+ p
42
+ else
43
+ p = add_path
44
+ p[:new_pos] += 1
45
+ push_component(p[:components], true, nil)
46
+ p
47
+ end
48
+
49
+ old_pos = extract_common(base_path, new_string, old_string, diagonal_path)
50
+
51
+ if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
52
+ return build_values(base_path[:components], new_string, old_string)
53
+ else
54
+ best_path[diagonal_path] = base_path
55
+ end
56
+
57
+ diagonal_path += 2
58
+ end
59
+
60
+ edit_length += 1
61
+ nil
62
+ end
63
+
64
+ while edit_length <= max_edit_length
65
+ if res = exec_edit_length.call
66
+ return res
67
+ end
68
+ end
69
+
70
+ 'death'
71
+ end
72
+
73
+ def push_component(components, added, removed)
74
+ last = components.last
75
+ if last && last[:added] == added && last[:removed] == removed
76
+ components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 }
77
+ else
78
+ components.push(count: 1, added: added, removed: removed)
79
+ end
80
+ end
81
+
82
+ # base_path : { new_pos: int, components: [] }
83
+ # diagonal_path : int
84
+ def extract_common(base_path, new_string, old_string, diagonal_path)
85
+ new_len = new_string.size
86
+ old_len = old_string.size
87
+ new_pos = base_path[:new_pos]
88
+ old_pos = new_pos - diagonal_path
89
+ common_count = 0
90
+
91
+ while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1])
92
+ new_pos += 1
93
+ old_pos += 1
94
+ common_count += 1
95
+ end
96
+
97
+ if common_count > 0
98
+ base_path[:components].push(count: common_count)
99
+ end
100
+
101
+ base_path[:new_pos] = new_pos
102
+ old_pos
103
+ end
104
+
105
+ def equals(l, r)
106
+ l == r
107
+ # TODO: support custom comparator
108
+ # TODO: support case-insensitive
109
+ end
110
+
111
+ def remove_empty(array)
112
+ array.compact
113
+ end
114
+
115
+ def cast_input(str)
116
+ str
117
+ end
118
+
119
+ def tokenize(str)
120
+ str.split(' ')
121
+ end
122
+
123
+ def join(chars)
124
+ chars.join(' ')
125
+ end
126
+
127
+ # new_string - tokenized string i.e. array of strings
128
+ def build_values(components, new_string, old_string, use_longest_token = true)
129
+ component_pos = 0
130
+ component_len = components.size
131
+ new_pos = 0
132
+ old_pos = 0
133
+
134
+ while component_pos < component_len
135
+ component = components[component_pos]
136
+ if !component[:removed]
137
+ if !component[:added] && use_longest_token
138
+ value = new_string[new_pos, component[:count]]
139
+ value = value.map.with_index do |val, i|
140
+ old_val = old_string[old_pos + i]
141
+ old_val.size > val.size ? old_val : val
142
+ end
143
+
144
+ component[:value] = join(value)
145
+ else
146
+ component[:value] = join(new_string[new_pos, component[:count]])
147
+ end
148
+
149
+ new_pos += component[:count]
150
+ old_pos += component[:count] unless component[:added]
151
+ else
152
+ component[:value] = join(old_string[old_pos, component[:count]])
153
+ old_pos += component[:count]
154
+
155
+ if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added]
156
+ tmp = components[component_pos - 1]
157
+ components[component_pos - 1] = components[component_pos]
158
+ components[component_pos] = tmp
159
+ end
160
+ end
161
+
162
+ component_pos += 1
163
+ end
164
+
165
+ last_component = components[component_len - 1]
166
+ if component_len > 1 &&
167
+ last_component[:value].is_a?(String) &&
168
+ (last_component[:added] || last_component[:removed]) &&
169
+ equals('', last_component[:value])
170
+ components[component_len - 2][:value] += last_component[:value]
171
+ components.pop
172
+ end
173
+
174
+ recount(components)
175
+
176
+ components
177
+ end
178
+
179
+ def clone_path(path_hash)
180
+ { new_pos: path_hash[:new_pos], components: path_hash[:components].dup }
181
+ end
182
+
183
+ def recount(components)
184
+ components.each { |component| component[:count] = component[:value].size }
185
+ end
186
+ end
187
+ end
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: myers_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Tsui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-22 00:00:00.000000000 Z
11
+ date: 2020-05-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: Implementation of Myers 1986 text diff algorithmthat started as a port
14
- of the jsdiff project withplans to branch out to human-friendly diffs
13
+ description: Implementation of Myers 1986 text diff algorithm that started as a port
14
+ of the jsdiff project with plans to branch out to human-friendly diffs
15
15
  email: alextsui@pm.me
16
16
  executables: []
17
17
  extensions: []
@@ -20,6 +20,7 @@ files:
20
20
  - lib/myers_diff.rb
21
21
  - lib/myers_diff/char_diff.rb
22
22
  - lib/myers_diff/version.rb
23
+ - lib/myers_diff/word_diff.rb
23
24
  homepage: https://github.com/alextsui05/myers_diff
24
25
  licenses:
25
26
  - MIT