text_alignment 0.11.5 → 0.11.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/align_annotations +32 -9
- data/lib/text_alignment/char_mapping.rb +1 -1
- data/lib/text_alignment/mixed_alignment.rb +15 -4
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ed6071b0293e9b7fa86acf4a737c80c9d784f453d3fb77992e3d9f1acc02bbe
|
4
|
+
data.tar.gz: 7a57b7afbe21061d9aac2f96cd9f7a3c83ff2f01b6c3973905f7681a40463287
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141158c2f6b80975bacf68babc713b06d809f02b96cfd7accdecb00c6b4c362b9d130e29820074be7b89ac53c7cc93f850ec3d7ea9796eaad5021d441528e82b
|
7
|
+
data.tar.gz: 8bc4cc9ca9becc1c5638b6501268d33dd7700e8369d7c20c1ec6fd9ef11e9fd73e6a1354cb37376af250c8462e945cccf08417ba6d258f08660c53eb418e4658
|
data/bin/align_annotations
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
require 'text_alignment'
|
3
3
|
require 'json'
|
4
4
|
require 'pp'
|
5
|
+
require 'optparse'
|
5
6
|
|
6
7
|
def read_annotations(filename)
|
7
8
|
case File.extname(filename)
|
@@ -108,24 +109,46 @@ def align_mannotations(source_annotations, reference_text, alignment, debug = fa
|
|
108
109
|
end
|
109
110
|
|
110
111
|
|
112
|
+
## Options
|
113
|
+
overlap_p = false
|
114
|
+
debug_p = false
|
115
|
+
|
116
|
+
## command line option processing
|
117
|
+
require 'optparse'
|
118
|
+
optparse = OptionParser.new do |opts|
|
119
|
+
opts.banner = "Usage: align_annotations [options] target_annotations(.json|.txt) reference_text(.json|.txt)"
|
120
|
+
|
121
|
+
opts.on('-o', '--overlap', 'tells it to assume there may be overlapping texts.') do
|
122
|
+
overlap_p = true
|
123
|
+
end
|
124
|
+
|
125
|
+
opts.on('-d', '--debug', 'tells it to show debugging information.') do
|
126
|
+
debug_p = true
|
127
|
+
end
|
128
|
+
|
129
|
+
opts.on('-h', '--help', 'displays this screen.') do
|
130
|
+
puts opts
|
131
|
+
exit
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
optparse.parse!
|
136
|
+
|
111
137
|
unless ARGV.length == 2
|
112
|
-
|
113
|
-
exit
|
138
|
+
puts optparse.help
|
139
|
+
exit 1
|
114
140
|
end
|
115
141
|
|
116
142
|
source_annotations = read_annotations(ARGV[0])
|
117
143
|
reference_text = read_text(ARGV[1])
|
118
144
|
|
119
|
-
alignment = TextAlignment::TextAlignment.new(reference_text,
|
145
|
+
alignment = TextAlignment::TextAlignment.new(reference_text, !overlap_p)
|
120
146
|
|
121
147
|
target_annotations = if source_annotations.class == Array
|
122
|
-
|
123
|
-
align_mannotations(source_annotations, reference_text, alignment, false)
|
148
|
+
align_mannotations(source_annotations, reference_text, alignment, debug_p)
|
124
149
|
else
|
125
|
-
|
126
|
-
denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, false)
|
150
|
+
denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, debug_p)
|
127
151
|
source_annotations.merge({text:reference_text, denotations:denotations})
|
128
152
|
end
|
129
153
|
|
130
|
-
|
131
|
-
# puts target_annotations.to_json
|
154
|
+
puts target_annotations.to_json
|
@@ -108,7 +108,7 @@ class TextAlignment::CharMapping
|
|
108
108
|
def enmap_text(_text, char_mapping)
|
109
109
|
text = _text.dup
|
110
110
|
|
111
|
-
# To execute the single letter mapping
|
111
|
+
# To execute the single letter mapping replacement
|
112
112
|
char_mapping.each do |one, long|
|
113
113
|
text.gsub!(one, long) if long.length == 1
|
114
114
|
end
|
@@ -147,13 +147,24 @@ class TextAlignment::MixedAlignment
|
|
147
147
|
# recoverbility
|
148
148
|
count_nws = sdiff.count{|d| d.old_element =~ /\S/}
|
149
149
|
count_nws_match = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/}
|
150
|
-
|
151
150
|
coverage = count_nws_match.to_f / count_nws
|
152
151
|
|
153
152
|
# fragmentation rate
|
154
|
-
|
155
|
-
|
156
|
-
|
153
|
+
frag_str = sdiff.collect do |d|
|
154
|
+
case d.action
|
155
|
+
when '='
|
156
|
+
'='
|
157
|
+
when '-'
|
158
|
+
''
|
159
|
+
when '+'
|
160
|
+
(d.new_element =~ /\S/) ? '+' : ''
|
161
|
+
else
|
162
|
+
''
|
163
|
+
end
|
164
|
+
end.join.sub(/^[^=]++/, '').sub(/[^=]+$/, '')
|
165
|
+
|
166
|
+
count_frag = frag_str.scan(/=+/).count
|
167
|
+
rate_frag = 1.0 / count_frag
|
157
168
|
|
158
169
|
similarity = coverage * rate_frag
|
159
170
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|