glaemscribe 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/glaemresources/charsets/tengwar_ds_annatar.cst +324 -218
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +329 -226
- data/glaemresources/charsets/tengwar_ds_elfica.cst +323 -221
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +331 -226
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +325 -222
- data/glaemresources/charsets/tengwar_guni_annatar.cst +274 -169
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +272 -169
- data/glaemresources/charsets/tengwar_guni_elfica.cst +276 -171
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +274 -171
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +274 -171
- data/lib/api/charset.rb +124 -57
- data/lib/api/charset_parser.rb +33 -26
- data/lib/api/post_processor/resolve_virtuals.rb +41 -19
- data/lib/api/transcription_pre_post_processor.rb +44 -41
- metadata +3 -3
data/lib/api/charset.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
#
|
3
3
|
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
6
|
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# This program is free software: you can redistribute it and/or modify
|
11
11
|
# it under the terms of the GNU Affero General Public License as published by
|
12
12
|
# the Free Software Foundation, either version 3 of the License, or
|
13
13
|
# any later version.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# This program is distributed in the hope that it will be useful,
|
16
16
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
17
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
18
|
# GNU Affero General Public License for more details.
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# You should have received a copy of the GNU Affero General Public License
|
21
21
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
22
|
|
@@ -24,53 +24,94 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
class Charset
|
26
26
|
attr_reader :name
|
27
|
-
|
27
|
+
|
28
28
|
attr_accessor :errors
|
29
29
|
attr_reader :chars
|
30
30
|
attr_reader :virtual_chars
|
31
|
-
|
31
|
+
attr_reader :swaps
|
32
|
+
|
33
|
+
class Swap
|
34
|
+
attr_accessor :line
|
35
|
+
attr_accessor :trigger
|
36
|
+
attr_accessor :targets
|
37
|
+
|
38
|
+
def initialize(trigger, target_list)
|
39
|
+
@trigger = trigger
|
40
|
+
@targets = {}
|
41
|
+
|
42
|
+
@target_list = target_list
|
43
|
+
end
|
44
|
+
|
45
|
+
def finalize(charset)
|
46
|
+
@lookup_table = {}
|
47
|
+
|
48
|
+
trig = charset.n2c(@trigger)
|
49
|
+
|
50
|
+
if !trig
|
51
|
+
charset.errors << Glaeml::Error.new(@line, "Swap operator triggers #{@trigger} which does not exist in charset.")
|
52
|
+
end
|
53
|
+
|
54
|
+
@target_list.each{ |target_id|
|
55
|
+
c = charset.n2c(target_id)
|
56
|
+
if !c
|
57
|
+
charset.errors << Glaeml::Error.new(@line, "Swap operator targets #{target_id} which does not exist in charset.")
|
58
|
+
else
|
59
|
+
c.names.each{ |n|
|
60
|
+
@targets[n] = c
|
61
|
+
}
|
62
|
+
end
|
63
|
+
}
|
64
|
+
|
65
|
+
trig
|
66
|
+
end
|
67
|
+
|
68
|
+
def has_target?(tname)
|
69
|
+
(@targets[tname] != nil)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
32
73
|
class Char
|
33
74
|
attr_accessor :line # Line num in the sourcecode
|
34
75
|
attr_accessor :code # Position in unicode
|
35
76
|
attr_accessor :names # Names
|
36
77
|
attr_accessor :str # How does this char resolve as a string
|
37
78
|
attr_accessor :charset # Pointer to parent charset
|
38
|
-
|
79
|
+
|
39
80
|
def initialize
|
40
81
|
@names = {}
|
41
82
|
end
|
42
|
-
|
83
|
+
|
43
84
|
def virtual?
|
44
85
|
false
|
45
86
|
end
|
46
|
-
|
87
|
+
|
47
88
|
def sequence?
|
48
89
|
false
|
49
90
|
end
|
50
91
|
end
|
51
|
-
|
52
|
-
class VirtualChar # Could have had inheritance here ...
|
92
|
+
|
93
|
+
class VirtualChar # Could have had inheritance here ...
|
53
94
|
attr_accessor :line
|
54
95
|
attr_accessor :names
|
55
96
|
attr_accessor :classes
|
56
97
|
attr_accessor :charset
|
57
98
|
attr_accessor :reversed
|
58
99
|
attr_accessor :default
|
59
|
-
|
100
|
+
|
60
101
|
class VirtualClass
|
61
102
|
attr_accessor :target
|
62
103
|
attr_accessor :triggers
|
63
104
|
end
|
64
|
-
|
105
|
+
|
65
106
|
def initialize
|
66
107
|
@classes = {} # result_char_1 => [trigger_char_1, trigger_char_2 ...] , result_char_1 => ...
|
67
108
|
@lookup_table = {}
|
68
109
|
@reversed = false
|
69
110
|
@default = nil
|
70
111
|
end
|
71
|
-
|
112
|
+
|
72
113
|
def str
|
73
|
-
|
114
|
+
|
74
115
|
# Will be called if the virtual char could not be replaced and still exists at the end of the transcription chain
|
75
116
|
if @default
|
76
117
|
@charset[@default].str
|
@@ -78,14 +119,14 @@ module Glaemscribe
|
|
78
119
|
VIRTUAL_CHAR_OUTPUT
|
79
120
|
end
|
80
121
|
end
|
81
|
-
|
122
|
+
|
82
123
|
def finalize
|
83
124
|
@lookup_table = {}
|
84
125
|
@classes.each{ |vc|
|
85
|
-
|
126
|
+
|
86
127
|
result_char = vc.target
|
87
128
|
trigger_chars = vc.triggers
|
88
|
-
|
129
|
+
|
89
130
|
trigger_chars.each{ |trigger_char|
|
90
131
|
found = @lookup_table[trigger_char]
|
91
132
|
if found
|
@@ -93,90 +134,91 @@ module Glaemscribe
|
|
93
134
|
else
|
94
135
|
rc = @charset[result_char]
|
95
136
|
tc = @charset[trigger_char]
|
96
|
-
|
137
|
+
|
97
138
|
if rc.nil?
|
98
139
|
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to unknown result char #{result_char}.")
|
99
140
|
elsif tc.nil?
|
100
|
-
@charset.errors << Glaeml::Error.new(@line, "Unknown trigger char #{trigger_char}.")
|
141
|
+
@charset.errors << Glaeml::Error.new(@line, "Unknown trigger char #{trigger_char}.")
|
101
142
|
elsif rc.class == VirtualChar
|
102
143
|
@charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to another virtual char #{result_char}. This is not supported!")
|
103
144
|
else
|
104
145
|
tc.names.each{|trigger_char_name| # Don't forget to match all name variants for that trigger char!
|
105
146
|
@lookup_table[trigger_char_name] = rc
|
106
|
-
}
|
107
|
-
end
|
108
|
-
end
|
147
|
+
}
|
148
|
+
end
|
149
|
+
end
|
109
150
|
}
|
110
151
|
}
|
111
152
|
if @default
|
112
153
|
c = @charset[@default]
|
113
154
|
if !c
|
114
|
-
@charset.errors << Glaeml::Error.new(@line, "Default char #{@default} does not match any real character in the charset.")
|
155
|
+
@charset.errors << Glaeml::Error.new(@line, "Default char #{@default} does not match any real character in the charset.")
|
115
156
|
elsif c.virtual?
|
116
157
|
@charset.errors << Glaeml::Error.new(@line, "Default char #{@default} is virtual, it should be real only.")
|
117
158
|
end
|
118
159
|
end
|
119
160
|
end
|
120
|
-
|
161
|
+
|
121
162
|
def [](trigger_char_name)
|
122
163
|
@lookup_table[trigger_char_name]
|
123
164
|
end
|
124
|
-
|
165
|
+
|
125
166
|
def virtual?
|
126
167
|
true
|
127
168
|
end
|
128
|
-
|
169
|
+
|
129
170
|
def sequence?
|
130
171
|
false
|
131
172
|
end
|
132
173
|
end
|
133
|
-
|
174
|
+
|
134
175
|
class SequenceChar
|
135
176
|
attr_accessor :line # Line of code
|
136
177
|
attr_accessor :names # Names
|
137
178
|
attr_accessor :sequence # The sequence of chars
|
138
179
|
attr_accessor :charset # Pointer to parent charset
|
139
|
-
|
180
|
+
|
140
181
|
def virtual?
|
141
182
|
false
|
142
183
|
end
|
143
|
-
|
184
|
+
|
144
185
|
def sequence?
|
145
186
|
true
|
146
|
-
end
|
147
|
-
|
187
|
+
end
|
188
|
+
|
148
189
|
def str
|
149
190
|
# A sequence char should never arrive unreplaced
|
150
191
|
VIRTUAL_CHAR_OUTPUT
|
151
192
|
end
|
152
|
-
|
153
|
-
def finalize
|
193
|
+
|
194
|
+
def finalize
|
154
195
|
if @sequence.count == 0
|
155
|
-
@charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
|
196
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
|
156
197
|
end
|
157
|
-
|
198
|
+
|
158
199
|
@sequence.each{ |symbol|
|
159
200
|
# Check that the sequence is correct
|
160
201
|
found = @charset[symbol]
|
161
202
|
if !found
|
162
203
|
@charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
|
163
204
|
end
|
164
|
-
}
|
205
|
+
}
|
165
206
|
end
|
166
|
-
|
207
|
+
|
167
208
|
end
|
168
|
-
|
209
|
+
|
169
210
|
def initialize(name)
|
170
211
|
@name = name
|
171
212
|
@chars = []
|
172
213
|
@errors = []
|
173
214
|
@virtual_chars = []
|
215
|
+
@swaps = []
|
174
216
|
end
|
175
|
-
|
217
|
+
|
176
218
|
# Pass integer (utf8 num) and array (of strings)
|
177
219
|
def add_char(line, code, names)
|
178
220
|
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
179
|
-
|
221
|
+
|
180
222
|
c = Char.new
|
181
223
|
c.line = line
|
182
224
|
c.code = code
|
@@ -185,10 +227,10 @@ module Glaemscribe
|
|
185
227
|
c.charset = self
|
186
228
|
@chars << c
|
187
229
|
end
|
188
|
-
|
230
|
+
|
189
231
|
def add_virtual_char(line, classes, names, reversed = false, default = nil)
|
190
232
|
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
191
|
-
|
233
|
+
|
192
234
|
c = VirtualChar.new
|
193
235
|
c.line = line
|
194
236
|
c.names = names
|
@@ -196,25 +238,34 @@ module Glaemscribe
|
|
196
238
|
c.charset = self
|
197
239
|
c.reversed = reversed
|
198
240
|
c.default = default
|
199
|
-
@chars << c
|
241
|
+
@chars << c
|
200
242
|
end
|
201
|
-
|
243
|
+
|
202
244
|
def add_sequence_char(line, names, seq)
|
203
245
|
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
204
|
-
|
246
|
+
|
205
247
|
c = SequenceChar.new
|
206
248
|
c.line = line
|
207
249
|
c.names = names
|
208
|
-
c.sequence = seq.split.reject{|token| token.empty? }
|
250
|
+
c.sequence = seq.split.reject{|token| token.empty? }
|
209
251
|
c.charset = self
|
210
252
|
@chars << c
|
211
253
|
end
|
212
|
-
|
254
|
+
|
255
|
+
def add_swap(line, target, triggers)
|
256
|
+
return if target.empty? || triggers.empty?
|
257
|
+
|
258
|
+
s = Swap.new(target, triggers)
|
259
|
+
s.line = line
|
260
|
+
@swaps << s
|
261
|
+
end
|
262
|
+
|
213
263
|
def finalize
|
214
264
|
@errors = []
|
215
265
|
@lookup_table = {}
|
216
266
|
@virtual_chars = [] # A convenient filtered array
|
217
|
-
|
267
|
+
@swap_lookup = {}
|
268
|
+
|
218
269
|
@chars.each { |c|
|
219
270
|
c.names.each { |cname|
|
220
271
|
found = @lookup_table[cname]
|
@@ -225,27 +276,43 @@ module Glaemscribe
|
|
225
276
|
end
|
226
277
|
}
|
227
278
|
}
|
228
|
-
|
279
|
+
|
229
280
|
@chars.each{ |c|
|
230
281
|
if c.class == VirtualChar
|
231
282
|
c.finalize
|
232
283
|
@virtual_chars << c
|
233
284
|
end
|
234
285
|
}
|
235
|
-
|
286
|
+
|
236
287
|
@chars.each{|c|
|
237
288
|
if c.class == SequenceChar
|
238
289
|
c.finalize
|
239
290
|
end
|
240
291
|
}
|
241
|
-
|
292
|
+
|
293
|
+
@swaps.each{ |s|
|
294
|
+
trig = s.finalize(self)
|
295
|
+
if trig
|
296
|
+
trig.names.each{ |n|
|
297
|
+
@swap_lookup[n] = s
|
298
|
+
}
|
299
|
+
end
|
300
|
+
}
|
242
301
|
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
243
302
|
end
|
244
|
-
|
303
|
+
|
245
304
|
def [](symbol)
|
246
305
|
@lookup_table[symbol]
|
247
306
|
end
|
248
|
-
|
307
|
+
|
308
|
+
def n2c(symbol)
|
309
|
+
self[symbol]
|
310
|
+
end
|
311
|
+
|
312
|
+
def swap_for_trigger(trigger_name)
|
313
|
+
@swap_lookup[trigger_name]
|
314
|
+
end
|
315
|
+
|
249
316
|
end
|
250
317
|
end
|
251
|
-
end
|
318
|
+
end
|
data/lib/api/charset_parser.rb
CHANGED
@@ -1,37 +1,37 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
#
|
3
3
|
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
6
|
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# This program is free software: you can redistribute it and/or modify
|
11
11
|
# it under the terms of the GNU Affero General Public License as published by
|
12
12
|
# the Free Software Foundation, either version 3 of the License, or
|
13
13
|
# any later version.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# This program is distributed in the hope that it will be useful,
|
16
16
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
17
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
18
|
# GNU Affero General Public License for more details.
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# You should have received a copy of the GNU Affero General Public License
|
21
21
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
22
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
|
-
|
25
|
+
|
26
26
|
class CharsetParser
|
27
|
-
|
27
|
+
|
28
28
|
def initialize()
|
29
29
|
@charset = nil
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
def parse(file_path)
|
33
|
-
@charset = Charset.new(ResourceManager::charset_name_from_file_path(file_path))
|
34
|
-
|
33
|
+
@charset = Charset.new(ResourceManager::charset_name_from_file_path(file_path))
|
34
|
+
|
35
35
|
raw = File.open(file_path,"rb:utf-8").read
|
36
36
|
doc = Glaeml::Parser.new.parse(raw)
|
37
37
|
|
@@ -39,33 +39,40 @@ module Glaemscribe
|
|
39
39
|
@charset.errors = doc.errors
|
40
40
|
return @charset
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
# TODO : verify charset glaeml like we do with modes
|
44
|
-
|
44
|
+
|
45
45
|
doc.root_node.gpath("char").each { |char_element|
|
46
46
|
code = char_element.args[0].hex
|
47
47
|
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
48
48
|
@charset.add_char(char_element.line,code,names)
|
49
49
|
}
|
50
|
-
|
51
|
-
doc.root_node.gpath("seq").each{ |seq_elemnt|
|
50
|
+
|
51
|
+
doc.root_node.gpath("seq").each{ |seq_elemnt|
|
52
52
|
names = seq_elemnt.args
|
53
53
|
child_node = seq_elemnt.children.first
|
54
54
|
seq = (child_node && child_node.text?)?(child_node.args.first):("")
|
55
55
|
@charset.add_sequence_char(seq_elemnt.line,names,seq)
|
56
56
|
}
|
57
|
-
|
57
|
+
|
58
|
+
doc.root_node.gpath("swap").each{ |element|
|
59
|
+
trigger_one = element.args.first
|
60
|
+
text_lines = element.children.select{ |c| c.text? }.map{ |c| c.args.first }
|
61
|
+
second_triggers = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
|
62
|
+
@charset.add_swap(element.line, trigger_one, second_triggers)
|
63
|
+
}
|
64
|
+
|
58
65
|
doc.root_node.gpath("virtual").each { |virtual_element|
|
59
66
|
names = virtual_element.args
|
60
|
-
reversed = false
|
67
|
+
reversed = false
|
61
68
|
default = nil
|
62
69
|
classes = []
|
63
|
-
|
70
|
+
|
64
71
|
virtual_element.gpath("class").each { |class_element|
|
65
72
|
vc = Charset::VirtualChar::VirtualClass.new
|
66
73
|
vc.target = class_element.args[0]
|
67
74
|
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
68
|
-
|
75
|
+
|
69
76
|
# Allow triggers to be defined inside the body of the class element
|
70
77
|
text_lines = class_element.children.select { |c| c.text? }.map{ |c| c.args.first}
|
71
78
|
inner_triggers = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
|
@@ -73,21 +80,21 @@ module Glaemscribe
|
|
73
80
|
|
74
81
|
classes << vc
|
75
82
|
}
|
76
|
-
virtual_element.gpath("reversed").each { |reversed_element|
|
83
|
+
virtual_element.gpath("reversed").each { |reversed_element|
|
77
84
|
reversed = true
|
78
85
|
}
|
79
|
-
virtual_element.gpath("default").each { |default_element|
|
86
|
+
virtual_element.gpath("default").each { |default_element|
|
80
87
|
default = default_element.args[0]
|
81
88
|
}
|
82
|
-
|
89
|
+
|
83
90
|
@charset.add_virtual_char(virtual_element.line,classes,names,reversed,default)
|
84
91
|
}
|
85
|
-
|
92
|
+
|
86
93
|
@charset.finalize
|
87
|
-
|
88
|
-
@charset
|
94
|
+
|
95
|
+
@charset
|
89
96
|
end
|
90
|
-
|
97
|
+
|
91
98
|
end
|
92
99
|
end
|
93
100
|
end
|
@@ -1,22 +1,22 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
#
|
3
3
|
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
6
|
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# This program is free software: you can redistribute it and/or modify
|
11
11
|
# it under the terms of the GNU Affero General Public License as published by
|
12
12
|
# the Free Software Foundation, either version 3 of the License, or
|
13
13
|
# any later version.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# This program is distributed in the hope that it will be useful,
|
16
16
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
17
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
18
|
# GNU Affero General Public License for more details.
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# You should have received a copy of the GNU Affero General Public License
|
21
21
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
22
|
|
@@ -24,25 +24,25 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
|
26
26
|
class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
|
27
|
-
|
27
|
+
|
28
28
|
def finalize(trans_options)
|
29
29
|
super(trans_options)
|
30
30
|
@last_triggers = {} # Allocate the lookup here to optimize
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def reset_trigger_states(charset)
|
34
34
|
# For each virtual char in charset, maintain a state.
|
35
35
|
charset.virtual_chars.each{ |vc|
|
36
36
|
@last_triggers[vc] = nil # Clear the state
|
37
37
|
}
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
|
41
41
|
if token == '*SPACE' || token =='*LF'
|
42
42
|
reset_trigger_states(charset)
|
43
43
|
return
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
# Check if token is a virtual char
|
47
47
|
c = charset[token]
|
48
48
|
return if c.nil? # May happen for empty tokens
|
@@ -54,14 +54,14 @@ module Glaemscribe
|
|
54
54
|
token = new_tokens[idx] # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
|
55
55
|
end
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
# Update states of virtual classes
|
59
59
|
charset.virtual_chars.each{|vc|
|
60
60
|
rc = vc[token]
|
61
|
-
@last_triggers[vc] = rc if rc != nil
|
61
|
+
@last_triggers[vc] = rc if rc != nil
|
62
62
|
}
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
def apply_sequences(charset,tokens)
|
66
66
|
ret = []
|
67
67
|
tokens.each { |token|
|
@@ -74,21 +74,43 @@ module Glaemscribe
|
|
74
74
|
}
|
75
75
|
ret
|
76
76
|
end
|
77
|
-
|
77
|
+
|
78
|
+
def apply_swaps(charset, tokens)
|
79
|
+
|
80
|
+
idx = 0
|
81
|
+
while idx < tokens.length - 1
|
82
|
+
tok = tokens[idx]
|
83
|
+
tgt = tokens[idx+1]
|
84
|
+
|
85
|
+
trig = charset.swap_for_trigger(tok)
|
86
|
+
|
87
|
+
if trig && trig.has_target?(tgt)
|
88
|
+
tokens[idx+1] = tok
|
89
|
+
tokens[idx] = tgt
|
90
|
+
end
|
91
|
+
|
92
|
+
idx += 1
|
93
|
+
end
|
94
|
+
|
95
|
+
tokens
|
96
|
+
end
|
97
|
+
|
78
98
|
def apply(tokens,charset)
|
79
99
|
# Apply sequence chars
|
80
100
|
tokens = apply_sequences(charset,tokens)
|
81
|
-
|
101
|
+
|
102
|
+
tokens = apply_swaps(charset, tokens)
|
103
|
+
|
82
104
|
# Clone the tokens so that we can perform ligatures AND diacritics without interferences
|
83
105
|
new_tokens = tokens.clone
|
84
|
-
|
106
|
+
|
85
107
|
# Handle l to r virtuals (diacritics ?)
|
86
|
-
reset_trigger_states(charset)
|
108
|
+
reset_trigger_states(charset)
|
87
109
|
tokens.each_with_index{ |token,idx|
|
88
110
|
apply_loop(charset,tokens,new_tokens,false,token,idx)
|
89
111
|
}
|
90
112
|
# Handle r to l virtuals (ligatures ?)
|
91
|
-
reset_trigger_states(charset)
|
113
|
+
reset_trigger_states(charset)
|
92
114
|
tokens.reverse_each.with_index{ |token,idx|
|
93
115
|
apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
|
94
116
|
}
|
@@ -96,7 +118,7 @@ module Glaemscribe
|
|
96
118
|
end
|
97
119
|
end
|
98
120
|
|
99
|
-
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
121
|
+
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
100
122
|
|
101
123
|
end
|
102
124
|
end
|