plain-merge 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: fd3d000aba244c2628a3216cb39596c0019700c11ae89568efef00a6aa3aa796
4
+ data.tar.gz: 3e2c8c645cc153e6e48194d8ce9860b0adf8c4ce1d5582ab6b9f14d8a85fe2f7
5
+ SHA512:
6
+ metadata.gz: f4ac184c99e041ed6b0f0551022fc9e0163b681db893344a07b1f30503220250ffe9fd19092b5b60fe05a5ed78c627fc736ec0c3d7a9884259977a60f03e7278
7
+ data.tar.gz: eaf6d3f57d5cf225ef29e5aa9e0f5021df15d6b74d6aaa2b2d62bf9fa495dfe91fbd784b8775fbe443c38756749ea2c1282b6f784eaf69f2158cb3908f7a54ca
checksums.yaml.gz.sig ADDED
@@ -0,0 +1,2 @@
1
+ \s�(� ��V0;����%s���Z@O ���n��v&-���$K�(dz��d
2
+ Z�z:�:������p?�Hg��v��|����)#�B��K�c:�A�ܨ�3kk!��A+1K�'��~�G�q�j�#jlo }w��F���bA0w�D|���C#O{� h�Sx�mM���/�ZI���3����ڌ�VM�n.̽�ٛ4��G`)������������I[��Z iE� d��b
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Plain
4
+ module Merge
5
+ module Version
6
+ VERSION = "7.0.0"
7
+ end
8
+
9
+ VERSION = Version::VERSION
10
+ end
11
+ end
@@ -0,0 +1,262 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tree_haver"
4
+
5
+ module Plain
6
+ module Merge
7
+ PACKAGE_NAME = "plain-merge"
8
+ DEFAULT_TEXT_REFINEMENT_THRESHOLD = 0.7
9
+ DEFAULT_TEXT_REFINEMENT_WEIGHTS = {
10
+ content: 0.7,
11
+ length: 0.15,
12
+ position: 0.15
13
+ }.freeze
14
+
15
+ module_function
16
+
17
+ def text_feature_profile
18
+ {
19
+ family: "text",
20
+ supported_dialects: [],
21
+ supported_policies: []
22
+ }
23
+ end
24
+
25
+ def text_parse_request(source)
26
+ TreeHaver::ParserRequest.new(source: source, language: "text")
27
+ end
28
+
29
+ def normalize_text(source)
30
+ source
31
+ .gsub(/\r\n?/, "\n")
32
+ .strip
33
+ .split(/\n\s*\n+/)
34
+ .map { |block| block.strip.gsub(/\s+/, " ") }
35
+ .reject(&:empty?)
36
+ .join("\n\n")
37
+ end
38
+
39
+ def analyze_text(source)
40
+ normalized_source = normalize_text(source)
41
+ parts = normalized_source.empty? ? [] : normalized_source.split("\n\n")
42
+ cursor = 0
43
+
44
+ blocks = parts.each_with_index.map do |normalized, index|
45
+ start_offset = cursor
46
+ end_offset = start_offset + normalized.length
47
+ cursor = end_offset + 2
48
+
49
+ {
50
+ index: index,
51
+ normalized: normalized,
52
+ span: {
53
+ start: start_offset,
54
+ end: end_offset
55
+ }
56
+ }
57
+ end
58
+
59
+ {
60
+ kind: "text",
61
+ normalized_source: normalized_source,
62
+ blocks: blocks
63
+ }
64
+ end
65
+
66
+ def similarity_score(left_source, right_source)
67
+ left = analyze_text(left_source)
68
+ right = analyze_text(right_source)
69
+ total = [left[:blocks].length, right[:blocks].length].max
70
+ return 1.0 if total.zero?
71
+
72
+ sum = 0.0
73
+ total.times do |index|
74
+ left_block = left[:blocks][index]
75
+ right_block = right[:blocks][index]
76
+ next unless left_block && right_block
77
+
78
+ sum += jaccard(left_block[:normalized], right_block[:normalized])
79
+ end
80
+
81
+ sum / total
82
+ end
83
+
84
+ def is_similar(left_source, right_source, threshold)
85
+ score = similarity_score(left_source, right_source)
86
+ {
87
+ score: score,
88
+ threshold: threshold,
89
+ matched: score >= threshold
90
+ }
91
+ end
92
+
93
+ def match_text_blocks(template_source, destination_source)
94
+ template = analyze_text(template_source)
95
+ destination = analyze_text(destination_source)
96
+ matched_template = {}
97
+ matched_destination = {}
98
+ matched = []
99
+
100
+ destination[:blocks].each_with_index do |destination_block, destination_index|
101
+ template_index = template[:blocks].find_index.with_index do |template_block, candidate_index|
102
+ !matched_template[candidate_index] && template_block[:normalized] == destination_block[:normalized]
103
+ end
104
+ next unless template_index
105
+
106
+ matched_template[template_index] = true
107
+ matched_destination[destination_index] = true
108
+ matched << {
109
+ template_index: template_index,
110
+ destination_index: destination_index,
111
+ phase: "exact",
112
+ score: 1.0
113
+ }
114
+ end
115
+
116
+ destination[:blocks].each_with_index do |destination_block, destination_index|
117
+ next if matched_destination[destination_index]
118
+
119
+ best_template_index = nil
120
+ best_score = 0.0
121
+ template[:blocks].each_with_index do |template_block, template_index|
122
+ next if matched_template[template_index]
123
+
124
+ score = refined_text_similarity(
125
+ template_block,
126
+ destination_block,
127
+ template[:blocks].length,
128
+ destination[:blocks].length
129
+ )
130
+ next unless score >= DEFAULT_TEXT_REFINEMENT_THRESHOLD && score > best_score
131
+
132
+ best_score = score
133
+ best_template_index = template_index
134
+ end
135
+
136
+ next unless best_template_index
137
+
138
+ matched_template[best_template_index] = true
139
+ matched_destination[destination_index] = true
140
+ matched << {
141
+ template_index: best_template_index,
142
+ destination_index: destination_index,
143
+ phase: "refined",
144
+ score: best_score
145
+ }
146
+ end
147
+
148
+ {
149
+ matched: matched,
150
+ unmatched_template: template[:blocks].each_index.reject { |index| matched_template[index] },
151
+ unmatched_destination: destination[:blocks].each_index.reject { |index| matched_destination[index] }
152
+ }
153
+ end
154
+
155
+ def merge_text(template_source, destination_source)
156
+ template = analyze_text(template_source)
157
+ destination = analyze_text(destination_source)
158
+ matches = match_text_blocks(template_source, destination_source)
159
+ matched_template = matches[:matched].each_with_object({}) { |match, memo| memo[match[:template_index]] = true }
160
+ merged_blocks = destination[:blocks].map { |block| block[:normalized] }
161
+
162
+ template[:blocks].each_with_index do |block, index|
163
+ next if matched_template[index]
164
+
165
+ merged_blocks << block[:normalized]
166
+ end
167
+
168
+ {
169
+ ok: true,
170
+ diagnostics: [],
171
+ output: merged_blocks.join("\n\n")
172
+ }
173
+ end
174
+
175
+ def refined_text_similarity(template_block, destination_block, template_total, destination_total, weights = DEFAULT_TEXT_REFINEMENT_WEIGHTS)
176
+ content = string_similarity(template_block[:normalized], destination_block[:normalized])
177
+ length = length_similarity(template_block[:normalized], destination_block[:normalized])
178
+ position = position_similarity(
179
+ template_block[:index],
180
+ destination_block[:index],
181
+ template_total,
182
+ destination_total
183
+ )
184
+
185
+ (weights[:content] * content) + (weights[:length] * length) + (weights[:position] * position)
186
+ end
187
+
188
+ def token_set(normalized)
189
+ normalized.split(/\s+/).reject(&:empty?).to_h { |token| [token, true] }
190
+ end
191
+ private_class_method :token_set
192
+
193
+ def jaccard(left, right)
194
+ left_tokens = token_set(left)
195
+ right_tokens = token_set(right)
196
+ return 1.0 if left_tokens.empty? && right_tokens.empty?
197
+
198
+ intersection = left_tokens.keys.count { |token| right_tokens[token] }
199
+ union = (left_tokens.keys + right_tokens.keys).uniq.length
200
+ union.zero? ? 1.0 : intersection.to_f / union
201
+ end
202
+ private_class_method :jaccard
203
+
204
+ def levenshtein_distance(left, right)
205
+ return 0 if left == right
206
+ return right.length if left.empty?
207
+ return left.length if right.empty?
208
+
209
+ previous = (0..left.length).to_a
210
+ current = Array.new(left.length + 1, 0)
211
+
212
+ (1..right.length).each do |right_index|
213
+ current[0] = right_index
214
+
215
+ (1..left.length).each do |left_index|
216
+ cost = left[left_index - 1] == right[right_index - 1] ? 0 : 1
217
+ current[left_index] = [
218
+ current[left_index - 1] + 1,
219
+ previous[left_index] + 1,
220
+ previous[left_index - 1] + cost
221
+ ].min
222
+ end
223
+
224
+ previous = current.dup
225
+ end
226
+
227
+ previous[left.length]
228
+ end
229
+ private_class_method :levenshtein_distance
230
+
231
+ def string_similarity(left, right)
232
+ return 1.0 if left == right
233
+ return 0.0 if left.empty? || right.empty?
234
+
235
+ distance = levenshtein_distance(left, right)
236
+ 1.0 - (distance.to_f / [left.length, right.length].max)
237
+ end
238
+ private_class_method :string_similarity
239
+
240
+ def length_similarity(left, right)
241
+ return 1.0 if left.length == right.length
242
+ max_length = [left.length, right.length].max
243
+ return 1.0 if max_length.zero?
244
+
245
+ [left.length, right.length].min.to_f / max_length
246
+ end
247
+ private_class_method :length_similarity
248
+
249
+ def relative_position(index, total)
250
+ total > 1 ? index.to_f / (total - 1) : 0.5
251
+ end
252
+ private_class_method :relative_position
253
+
254
+ def position_similarity(template_index, destination_index, template_total, destination_total)
255
+ 1.0 - (
256
+ relative_position(template_index, template_total) -
257
+ relative_position(destination_index, destination_total)
258
+ ).abs
259
+ end
260
+ private_class_method :position_similarity
261
+ end
262
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "plain/merge"
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: plain-merge
3
+ version: !ruby/object:Gem::Version
4
+ version: 7.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Peter H. Boling
8
+ bindir: bin
9
+ cert_chain:
10
+ - |
11
+ -----BEGIN CERTIFICATE-----
12
+ MIIEgDCCAuigAwIBAgIBATANBgkqhkiG9w0BAQsFADBDMRUwEwYDVQQDDAxwZXRl
13
+ ci5ib2xpbmcxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkW
14
+ A2NvbTAeFw0yNTA1MDQxNTMzMDlaFw00NTA0MjkxNTMzMDlaMEMxFTATBgNVBAMM
15
+ DHBldGVyLmJvbGluZzEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPy
16
+ LGQBGRYDY29tMIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAruUoo0WA
17
+ uoNuq6puKWYeRYiZekz/nsDeK5x/0IEirzcCEvaHr3Bmz7rjo1I6On3gGKmiZs61
18
+ LRmQ3oxy77ydmkGTXBjruJB+pQEn7UfLSgQ0xa1/X3kdBZt6RmabFlBxnHkoaGY5
19
+ mZuZ5+Z7walmv6sFD9ajhzj+oIgwWfnEHkXYTR8I6VLN7MRRKGMPoZ/yvOmxb2DN
20
+ coEEHWKO9CvgYpW7asIihl/9GMpKiRkcYPm9dGQzZc6uTwom1COfW0+ZOFrDVBuV
21
+ FMQRPswZcY4Wlq0uEBLPU7hxnCL9nKK6Y9IhdDcz1mY6HZ91WImNslOSI0S8hRpj
22
+ yGOWxQIhBT3fqCBlRIqFQBudrnD9jSNpSGsFvbEijd5ns7Z9ZMehXkXDycpGAUj1
23
+ to/5cuTWWw1JqUWrKJYoifnVhtE1o1DZ+LkPtWxHtz5kjDG/zR3MG0Ula0UOavlD
24
+ qbnbcXPBnwXtTFeZ3C+yrWpE4pGnl3yGkZj9SMTlo9qnTMiPmuWKQDatAgMBAAGj
25
+ fzB9MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0GA1UdDgQWBBQE8uWvNbPVNRXZ
26
+ HlgPbc2PCzC4bjAhBgNVHREEGjAYgRZwZXRlci5ib2xpbmdAZ21haWwuY29tMCEG
27
+ A1UdEgQaMBiBFnBldGVyLmJvbGluZ0BnbWFpbC5jb20wDQYJKoZIhvcNAQELBQAD
28
+ ggGBAJbnUwfJQFPkBgH9cL7hoBfRtmWiCvdqdjeTmi04u8zVNCUox0A4gT982DE9
29
+ wmuN12LpdajxZONqbXuzZvc+nb0StFwmFYZG6iDwaf4BPywm2e/Vmq0YG45vZXGR
30
+ L8yMDSK1cQXjmA+ZBKOHKWavxP6Vp7lWvjAhz8RFwqF9GuNIdhv9NpnCAWcMZtpm
31
+ GUPyIWw/Cw/2wZp74QzZj6Npx+LdXoLTF1HMSJXZ7/pkxLCsB8m4EFVdb/IrW/0k
32
+ kNSfjtAfBHO8nLGuqQZVH9IBD1i9K6aSs7pT6TW8itXUIlkIUI2tg5YzW6OFfPzq
33
+ QekSkX3lZfY+HTSp/o+YvKkqWLUV7PQ7xh1ZYDtocpaHwgxe/j3bBqHE+CUPH2vA
34
+ 0V/FwdTRWcwsjVoOJTrYcff8pBZ8r2MvtAc54xfnnhGFzeRHfcltobgFxkAXdE6p
35
+ DVjBtqT23eugOqQ73umLcYDZkc36vnqGxUBSsXrzY9pzV5gGr2I8YUxMqf6ATrZt
36
+ L9nRqA==
37
+ -----END CERTIFICATE-----
38
+ date: 1980-01-02 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: ast-merge
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '='
45
+ - !ruby/object:Gem::Version
46
+ version: 7.0.0
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '='
52
+ - !ruby/object:Gem::Version
53
+ version: 7.0.0
54
+ - !ruby/object:Gem::Dependency
55
+ name: tree_haver
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '='
59
+ - !ruby/object:Gem::Version
60
+ version: 7.0.0
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - '='
66
+ - !ruby/object:Gem::Version
67
+ version: 7.0.0
68
+ description: Portable text normalization, similarity, matching, and merge behavior
69
+ for Structured Merge.
70
+ email:
71
+ - info@structuredmerge.org
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - lib/plain-merge.rb
77
+ - lib/plain/merge.rb
78
+ - lib/plain/merge/version.rb
79
+ homepage: https://github.com/structuredmerge/structuredmerge-ruby
80
+ licenses:
81
+ - AGPL-3.0-only
82
+ - PolyForm-Small-Business-1.0.0
83
+ metadata:
84
+ homepage_uri: https://structuredmerge.org
85
+ source_code_uri: https://github.com/structuredmerge/structuredmerge-ruby/tree/v7.0.0
86
+ changelog_uri: https://github.com/structuredmerge/structuredmerge-ruby/blob/v7.0.0/CHANGELOG.md
87
+ bug_tracker_uri: https://github.com/structuredmerge/structuredmerge-ruby/issues
88
+ documentation_uri: https://www.rubydoc.info/gems/plain-merge/7.0.0
89
+ funding_uri: https://github.com/sponsors/pboling
90
+ wiki_uri: https://github.com/structuredmerge/structuredmerge-ruby/wiki
91
+ discord_uri: https://discord.gg/3qme4XHNKN
92
+ rubygems_mfa_required: 'true'
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: 4.0.0
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubygems_version: 4.0.10
108
+ specification_version: 4
109
+ summary: Structured Merge plain text analysis and merge for Ruby
110
+ test_files: []
metadata.gz.sig ADDED
Binary file