text_alignment 0.12.2 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c2a36fe4cfde7dfb76f554fd4afcae7cb5a03e455887621217f5e5e633b20b3
4
- data.tar.gz: f63070c6f423bc15d0fc8c742a21238a104a1b2c1d3fe56ac436effa8ef8eacf
3
+ metadata.gz: ef51eeed4c82f3ddc211a1474a35c8b61a621ed7acb6bd801a62885c2c342448
4
+ data.tar.gz: 15b1d020f78a96e152459324d921092c5bd39477f856417fd30e4283d22399ea
5
5
  SHA512:
6
- metadata.gz: '078a41bc6ab9b16e7747be6a3bb15aff4b23a1161bcea0b653a93f04d673799afcac2109cd1ce8d1a95c99c5c07d36842e3698c9f0997500e653fb4ab939e04a'
7
- data.tar.gz: ce44d334779d43b3057317537f615ebf39b8049639d3e50c4e14272c952b76a5df2b060cfd3a15f1d28372c11a795ef8bf43cb04de32b5a78ce2f44433edddfb
6
+ metadata.gz: 473a18002d40bf3db81e0f42ffe3fbcc3ff5b1281964c2d5391a57b34ff07ff973ca86e5fe34dcbe2537410b46105e05a60772ffc27ed6667443062558201952
7
+ data.tar.gz: a8ddf9b9a2bd19b1e7303fcdf83fd862bbe8b601f2de88aa59c5d51b5a7184a0ee00fde86877390267ffd3cf8619c6a52bffabd9cb5b95be4ab34d20f81e1fab
@@ -78,6 +78,34 @@ TextAlignment::CHAR_MAPPING = [
78
78
  ['"', "''"]
79
79
  ]
80
80
 
81
+ # build a string of every Unicode whitespace codepoint \s covers:
82
+ ALL_WS = [
83
+ "\u0009", # CHARACTER TABULATION
84
+ "\u000A", # LINE FEED
85
+ "\u000B", # LINE TABULATION
86
+ "\u000C", # FORM FEED
87
+ "\u000D", # CARRIAGE RETURN
88
+ "\u0020", # SPACE
89
+ "\u0085", # NEXT LINE
90
+ "\u00A0", # NO-BREAK SPACE
91
+ "\u1680", # OGHAM SPACE MARK
92
+ "\u2000", # EN QUAD
93
+ "\u2001", # EM QUAD
94
+ "\u2002", # EN SPACE
95
+ "\u2003", # EM SPACE
96
+ "\u2004", # THREE-PER-EM SPACE
97
+ "\u2005", # FOUR-PER-EM SPACE
98
+ "\u2006", # SIX-PER-EM SPACE
99
+ "\u2007", # FIGURE SPACE
100
+ "\u2008", # PUNCTUATION SPACE
101
+ "\u2009", # THIN SPACE
102
+ "\u200A", # HAIR SPACE
103
+ "\u2028", # LINE SEPARATOR
104
+ "\u2029", # PARAGRAPH SEPARATOR
105
+ "\u202F", # NARROW NO-BREAK SPACE
106
+ "\u205F", # MEDIUM MATHEMATICAL SPACE
107
+ "\u3000", # IDEOGRAPHIC SPACE
108
+ ].join
81
109
 
82
110
  class TextAlignment::CharMapping
83
111
  attr_reader :mapped_text, :index_enmap
@@ -101,11 +129,11 @@ class TextAlignment::CharMapping
101
129
  end
102
130
 
103
131
  def enmap_position(position)
104
- @index_enmap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
132
+ @index_enmap[position]
105
133
  end
106
134
 
107
135
  def demap_position(position)
108
- @index_demap[position] || raise(ArgumentError, "Unusual position of annotation: #{position}")
136
+ @index_demap[position]
109
137
  end
110
138
 
111
139
  def enmap_denotations(denotations)
@@ -113,23 +141,6 @@ class TextAlignment::CharMapping
113
141
 
114
142
  denotations.map do |d|
115
143
  d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
116
- rescue ArgumentError => e
117
- snippet_begin = d[:span][:begin] - 5
118
- if snippet_begin < 0
119
- snippet_begin = 0
120
- end
121
- snippet_end = d[:span][:end] + 5
122
- if snippet_end > @text.length
123
- snippet_end = @text.length
124
- end
125
- snippet = @text[snippet_begin ... d[:span][:begin]] + '[' + @text[d[:span][:begin] ... d[:span][:end]] + ']' + @text[d[:span][:end] ... snippet_end]
126
- if snippet_begin > 0
127
- snippet = '...' + snippet
128
- end
129
- if snippet_end < @text.length
130
- snippet = snippet + '...'
131
- end
132
- raise ArgumentError, e.message + " (#{snippet})"
133
144
  end
134
145
  end
135
146
 
@@ -214,11 +225,15 @@ class TextAlignment::CharMapping
214
225
  end
215
226
 
216
227
  def squeeze_ws_1!(text)
217
- text.gsub!(/\s{2,}/, ' ')
228
+ # Below should have (almost) the same semantics as text.gsub!(/\s{2,}/, ' ')
229
+ non_space_ws = ALL_WS.delete(" ")
230
+ text.tr!(non_space_ws, " ")
231
+ text.squeeze!(" ")
218
232
  end
219
233
 
220
234
  def squeeze_ws_0!(text)
221
- text.gsub!(/\s+/, '')
235
+ # Below should have the same semantics as text.gsub!(/\s+/, '')
236
+ text.delete!(ALL_WS)
222
237
  end
223
238
 
224
239
  end
@@ -40,12 +40,12 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
 
42
42
  @mapped_text = @text_mapping.mapped_text
43
- denotations_mapped = @text_mapping.enmap_denotations(denotations)
44
43
 
45
44
  ## To generate the block_alignment of the input text against the reference text
46
45
  @blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
47
46
  r
48
47
  else
48
+ denotations_mapped = @text_mapping.enmap_denotations(denotations)
49
49
  find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
50
50
  end
51
51
 
@@ -343,7 +343,7 @@ class TextAlignment::TextAlignment
343
343
 
344
344
  ## term-based alignment
345
345
  tblocks = if denotations
346
- denotations_in_scope = denotations.select{|d| d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
346
+ denotations_in_scope = denotations.select{|d| d[:span][:begin] && d[:span][:end] && d[:span][:begin] >= b1 && d[:span][:end] <= e1}.
347
347
  sort{|d1, d2| d1[:span][:begin] <=> d2[:span][:begin] || d2[:span][:end] <=> d1[:span][:end] }.
348
348
  map{|d| d.merge(lex:str1[d[:span][:begin] ... d[:span][:end]])}
349
349
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.12.2'
2
+ VERSION = '0.12.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.2
4
+ version: 0.12.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-03 00:00:00.000000000 Z
11
+ date: 2025-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary
@@ -111,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
111
  - !ruby/object:Gem::Version
112
112
  version: '0'
113
113
  requirements: []
114
- rubygems_version: 3.4.19
114
+ rubygems_version: 3.4.10
115
115
  signing_key:
116
116
  specification_version: 4
117
117
  summary: Ruby class for aligning two character strings