text_alignment 0.11.5 → 0.11.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: abf4387fb9ea598d924eb0a7369ca58ec0be7cdd35ac89abc57647d665c00207
4
- data.tar.gz: 30ab0ccea5c04cae8132a70872cf5de4704ce7fd4e2104f7fddc6527794424c5
3
+ metadata.gz: 5ed6071b0293e9b7fa86acf4a737c80c9d784f453d3fb77992e3d9f1acc02bbe
4
+ data.tar.gz: 7a57b7afbe21061d9aac2f96cd9f7a3c83ff2f01b6c3973905f7681a40463287
5
5
  SHA512:
6
- metadata.gz: 2033123f2289c991b2a53826921ca19489f7f39c53918df5f962e321b1f3ff0996760e9df302872d07a7e638e6c56ad86428d1ce773d6e14c54180ad2360b1be
7
- data.tar.gz: a902ca48bba502af787deb3abf6c5a2b4c9a121394452ef25e95c61212ee0cfa4e991f83315bc6e03a52d45ead909acedeaf0abd9cdc278ccc58b7cb315ab556
6
+ metadata.gz: 141158c2f6b80975bacf68babc713b06d809f02b96cfd7accdecb00c6b4c362b9d130e29820074be7b89ac53c7cc93f850ec3d7ea9796eaad5021d441528e82b
7
+ data.tar.gz: 8bc4cc9ca9becc1c5638b6501268d33dd7700e8369d7c20c1ec6fd9ef11e9fd73e6a1354cb37376af250c8462e945cccf08417ba6d258f08660c53eb418e4658
@@ -2,6 +2,7 @@
2
2
  require 'text_alignment'
3
3
  require 'json'
4
4
  require 'pp'
5
+ require 'optparse'
5
6
 
6
7
  def read_annotations(filename)
7
8
  case File.extname(filename)
@@ -108,24 +109,46 @@ def align_mannotations(source_annotations, reference_text, alignment, debug = fa
108
109
  end
109
110
 
110
111
 
112
+ ## Options
113
+ overlap_p = false
114
+ debug_p = false
115
+
116
+ ## command line option processing
117
+ require 'optparse'
118
+ optparse = OptionParser.new do |opts|
119
+ opts.banner = "Usage: align_annotations [options] target_annotations(.json|.txt) reference_text(.json|.txt)"
120
+
121
+ opts.on('-o', '--overlap', 'tells it to assume there may be overlapping texts.') do
122
+ overlap_p = true
123
+ end
124
+
125
+ opts.on('-d', '--debug', 'tells it to show debugging information.') do
126
+ debug_p = true
127
+ end
128
+
129
+ opts.on('-h', '--help', 'displays this screen.') do
130
+ puts opts
131
+ exit
132
+ end
133
+ end
134
+
135
+ optparse.parse!
136
+
111
137
  unless ARGV.length == 2
112
- warn "align_annotations target_annotations(.json|.txt) reference_annotations(.json|.txt)"
113
- exit
138
+ puts optparse.help
139
+ exit 1
114
140
  end
115
141
 
116
142
  source_annotations = read_annotations(ARGV[0])
117
143
  reference_text = read_text(ARGV[1])
118
144
 
119
- alignment = TextAlignment::TextAlignment.new(reference_text, true)
145
+ alignment = TextAlignment::TextAlignment.new(reference_text, !overlap_p)
120
146
 
121
147
  target_annotations = if source_annotations.class == Array
122
- # align_mannotations(source_annotations, reference_text, alignment, true)
123
- align_mannotations(source_annotations, reference_text, alignment, false)
148
+ align_mannotations(source_annotations, reference_text, alignment, debug_p)
124
149
  else
125
- # denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment)
126
- denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, false)
150
+ denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, debug_p)
127
151
  source_annotations.merge({text:reference_text, denotations:denotations})
128
152
  end
129
153
 
130
- # pp alignment.block_alignment
131
- # puts target_annotations.to_json
154
+ puts target_annotations.to_json
@@ -108,7 +108,7 @@ class TextAlignment::CharMapping
108
108
  def enmap_text(_text, char_mapping)
109
109
  text = _text.dup
110
110
 
111
- # To execute the single letter mapping
111
+ # To execute the single letter mapping replacement
112
112
  char_mapping.each do |one, long|
113
113
  text.gsub!(one, long) if long.length == 1
114
114
  end
@@ -147,13 +147,24 @@ class TextAlignment::MixedAlignment
147
147
  # recoverbility
148
148
  count_nws = sdiff.count{|d| d.old_element =~ /\S/}
149
149
  count_nws_match = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/}
150
-
151
150
  coverage = count_nws_match.to_f / count_nws
152
151
 
153
152
  # fragmentation rate
154
- count_ofrag = sdiff.count{|d| d.old_element =~ /\s/} + 1
155
- count_frag = sdiff.collect{|d| (d.action == '=') && (d.old_element =~/\s/) ? ' ' : d.action}.join.scan(/=+/).count
156
- rate_frag = count_ofrag.to_f / count_frag
153
+ frag_str = sdiff.collect do |d|
154
+ case d.action
155
+ when '='
156
+ '='
157
+ when '-'
158
+ ''
159
+ when '+'
160
+ (d.new_element =~ /\S/) ? '+' : ''
161
+ else
162
+ ''
163
+ end
164
+ end.join.sub(/^[^=]++/, '').sub(/[^=]+$/, '')
165
+
166
+ count_frag = frag_str.scan(/=+/).count
167
+ rate_frag = 1.0 / count_frag
157
168
 
158
169
  similarity = coverage * rate_frag
159
170
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.11.5'
2
+ VERSION = '0.11.6'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.5
4
+ version: 0.11.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-25 00:00:00.000000000 Z
11
+ date: 2021-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary