glaemscribe 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/api/charset.rb CHANGED
@@ -1,22 +1,22 @@
1
1
  # encoding: UTF-8
2
2
  #
3
3
  # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- # the transcription of texts between writing systems, and more
5
- # specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  # invented languages to some of his devised writing systems.
7
- #
7
+ #
8
8
  # Copyright (C) 2015 Benjamin Babut (Talagan).
9
- #
9
+ #
10
10
  # This program is free software: you can redistribute it and/or modify
11
11
  # it under the terms of the GNU Affero General Public License as published by
12
12
  # the Free Software Foundation, either version 3 of the License, or
13
13
  # any later version.
14
- #
14
+ #
15
15
  # This program is distributed in the hope that it will be useful,
16
16
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
17
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
18
  # GNU Affero General Public License for more details.
19
- #
19
+ #
20
20
  # You should have received a copy of the GNU Affero General Public License
21
21
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
@@ -24,53 +24,94 @@ module Glaemscribe
24
24
  module API
25
25
  class Charset
26
26
  attr_reader :name
27
-
27
+
28
28
  attr_accessor :errors
29
29
  attr_reader :chars
30
30
  attr_reader :virtual_chars
31
-
31
+ attr_reader :swaps
32
+
33
+ class Swap
34
+ attr_accessor :line
35
+ attr_accessor :trigger
36
+ attr_accessor :targets
37
+
38
+ def initialize(trigger, target_list)
39
+ @trigger = trigger
40
+ @targets = {}
41
+
42
+ @target_list = target_list
43
+ end
44
+
45
+ def finalize(charset)
46
+ @lookup_table = {}
47
+
48
+ trig = charset.n2c(@trigger)
49
+
50
+ if !trig
51
+ charset.errors << Glaeml::Error.new(@line, "Swap operator triggers #{@trigger} which does not exist in charset.")
52
+ end
53
+
54
+ @target_list.each{ |target_id|
55
+ c = charset.n2c(target_id)
56
+ if !c
57
+ charset.errors << Glaeml::Error.new(@line, "Swap operator targets #{target_id} which does not exist in charset.")
58
+ else
59
+ c.names.each{ |n|
60
+ @targets[n] = c
61
+ }
62
+ end
63
+ }
64
+
65
+ trig
66
+ end
67
+
68
+ def has_target?(tname)
69
+ (@targets[tname] != nil)
70
+ end
71
+ end
72
+
32
73
  class Char
33
74
  attr_accessor :line # Line num in the sourcecode
34
75
  attr_accessor :code # Position in unicode
35
76
  attr_accessor :names # Names
36
77
  attr_accessor :str # How does this char resolve as a string
37
78
  attr_accessor :charset # Pointer to parent charset
38
-
79
+
39
80
  def initialize
40
81
  @names = {}
41
82
  end
42
-
83
+
43
84
  def virtual?
44
85
  false
45
86
  end
46
-
87
+
47
88
  def sequence?
48
89
  false
49
90
  end
50
91
  end
51
-
52
- class VirtualChar # Could have had inheritance here ...
92
+
93
+ class VirtualChar # Could have had inheritance here ...
53
94
  attr_accessor :line
54
95
  attr_accessor :names
55
96
  attr_accessor :classes
56
97
  attr_accessor :charset
57
98
  attr_accessor :reversed
58
99
  attr_accessor :default
59
-
100
+
60
101
  class VirtualClass
61
102
  attr_accessor :target
62
103
  attr_accessor :triggers
63
104
  end
64
-
105
+
65
106
  def initialize
66
107
  @classes = {} # result_char_1 => [trigger_char_1, trigger_char_2 ...] , result_char_1 => ...
67
108
  @lookup_table = {}
68
109
  @reversed = false
69
110
  @default = nil
70
111
  end
71
-
112
+
72
113
  def str
73
-
114
+
74
115
  # Will be called if the virtual char could not be replaced and still exists at the end of the transcription chain
75
116
  if @default
76
117
  @charset[@default].str
@@ -78,14 +119,14 @@ module Glaemscribe
78
119
  VIRTUAL_CHAR_OUTPUT
79
120
  end
80
121
  end
81
-
122
+
82
123
  def finalize
83
124
  @lookup_table = {}
84
125
  @classes.each{ |vc|
85
-
126
+
86
127
  result_char = vc.target
87
128
  trigger_chars = vc.triggers
88
-
129
+
89
130
  trigger_chars.each{ |trigger_char|
90
131
  found = @lookup_table[trigger_char]
91
132
  if found
@@ -93,90 +134,91 @@ module Glaemscribe
93
134
  else
94
135
  rc = @charset[result_char]
95
136
  tc = @charset[trigger_char]
96
-
137
+
97
138
  if rc.nil?
98
139
  @charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to unknown result char #{result_char}.")
99
140
  elsif tc.nil?
100
- @charset.errors << Glaeml::Error.new(@line, "Unknown trigger char #{trigger_char}.")
141
+ @charset.errors << Glaeml::Error.new(@line, "Unknown trigger char #{trigger_char}.")
101
142
  elsif rc.class == VirtualChar
102
143
  @charset.errors << Glaeml::Error.new(@line, "Trigger char #{trigger_char} points to another virtual char #{result_char}. This is not supported!")
103
144
  else
104
145
  tc.names.each{|trigger_char_name| # Don't forget to match all name variants for that trigger char!
105
146
  @lookup_table[trigger_char_name] = rc
106
- }
107
- end
108
- end
147
+ }
148
+ end
149
+ end
109
150
  }
110
151
  }
111
152
  if @default
112
153
  c = @charset[@default]
113
154
  if !c
114
- @charset.errors << Glaeml::Error.new(@line, "Default char #{@default} does not match any real character in the charset.")
155
+ @charset.errors << Glaeml::Error.new(@line, "Default char #{@default} does not match any real character in the charset.")
115
156
  elsif c.virtual?
116
157
  @charset.errors << Glaeml::Error.new(@line, "Default char #{@default} is virtual, it should be real only.")
117
158
  end
118
159
  end
119
160
  end
120
-
161
+
121
162
  def [](trigger_char_name)
122
163
  @lookup_table[trigger_char_name]
123
164
  end
124
-
165
+
125
166
  def virtual?
126
167
  true
127
168
  end
128
-
169
+
129
170
  def sequence?
130
171
  false
131
172
  end
132
173
  end
133
-
174
+
134
175
  class SequenceChar
135
176
  attr_accessor :line # Line of code
136
177
  attr_accessor :names # Names
137
178
  attr_accessor :sequence # The sequence of chars
138
179
  attr_accessor :charset # Pointer to parent charset
139
-
180
+
140
181
  def virtual?
141
182
  false
142
183
  end
143
-
184
+
144
185
  def sequence?
145
186
  true
146
- end
147
-
187
+ end
188
+
148
189
  def str
149
190
  # A sequence char should never arrive unreplaced
150
191
  VIRTUAL_CHAR_OUTPUT
151
192
  end
152
-
153
- def finalize
193
+
194
+ def finalize
154
195
  if @sequence.count == 0
155
- @charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
196
+ @charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
156
197
  end
157
-
198
+
158
199
  @sequence.each{ |symbol|
159
200
  # Check that the sequence is correct
160
201
  found = @charset[symbol]
161
202
  if !found
162
203
  @charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
163
204
  end
164
- }
205
+ }
165
206
  end
166
-
207
+
167
208
  end
168
-
209
+
169
210
  def initialize(name)
170
211
  @name = name
171
212
  @chars = []
172
213
  @errors = []
173
214
  @virtual_chars = []
215
+ @swaps = []
174
216
  end
175
-
217
+
176
218
  # Pass integer (utf8 num) and array (of strings)
177
219
  def add_char(line, code, names)
178
220
  return if names.empty? || names.include?("?") # Ignore characters with '?'
179
-
221
+
180
222
  c = Char.new
181
223
  c.line = line
182
224
  c.code = code
@@ -185,10 +227,10 @@ module Glaemscribe
185
227
  c.charset = self
186
228
  @chars << c
187
229
  end
188
-
230
+
189
231
  def add_virtual_char(line, classes, names, reversed = false, default = nil)
190
232
  return if names.empty? || names.include?("?") # Ignore characters with '?'
191
-
233
+
192
234
  c = VirtualChar.new
193
235
  c.line = line
194
236
  c.names = names
@@ -196,25 +238,34 @@ module Glaemscribe
196
238
  c.charset = self
197
239
  c.reversed = reversed
198
240
  c.default = default
199
- @chars << c
241
+ @chars << c
200
242
  end
201
-
243
+
202
244
  def add_sequence_char(line, names, seq)
203
245
  return if names.empty? || names.include?("?") # Ignore characters with '?'
204
-
246
+
205
247
  c = SequenceChar.new
206
248
  c.line = line
207
249
  c.names = names
208
- c.sequence = seq.split.reject{|token| token.empty? }
250
+ c.sequence = seq.split.reject{|token| token.empty? }
209
251
  c.charset = self
210
252
  @chars << c
211
253
  end
212
-
254
+
255
+ def add_swap(line, target, triggers)
256
+ return if target.empty? || triggers.empty?
257
+
258
+ s = Swap.new(target, triggers)
259
+ s.line = line
260
+ @swaps << s
261
+ end
262
+
213
263
  def finalize
214
264
  @errors = []
215
265
  @lookup_table = {}
216
266
  @virtual_chars = [] # A convenient filtered array
217
-
267
+ @swap_lookup = {}
268
+
218
269
  @chars.each { |c|
219
270
  c.names.each { |cname|
220
271
  found = @lookup_table[cname]
@@ -225,27 +276,43 @@ module Glaemscribe
225
276
  end
226
277
  }
227
278
  }
228
-
279
+
229
280
  @chars.each{ |c|
230
281
  if c.class == VirtualChar
231
282
  c.finalize
232
283
  @virtual_chars << c
233
284
  end
234
285
  }
235
-
286
+
236
287
  @chars.each{|c|
237
288
  if c.class == SequenceChar
238
289
  c.finalize
239
290
  end
240
291
  }
241
-
292
+
293
+ @swaps.each{ |s|
294
+ trig = s.finalize(self)
295
+ if trig
296
+ trig.names.each{ |n|
297
+ @swap_lookup[n] = s
298
+ }
299
+ end
300
+ }
242
301
  API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
243
302
  end
244
-
303
+
245
304
  def [](symbol)
246
305
  @lookup_table[symbol]
247
306
  end
248
-
307
+
308
+ def n2c(symbol)
309
+ self[symbol]
310
+ end
311
+
312
+ def swap_for_trigger(trigger_name)
313
+ @swap_lookup[trigger_name]
314
+ end
315
+
249
316
  end
250
317
  end
251
- end
318
+ end
@@ -1,37 +1,37 @@
1
1
  # encoding: UTF-8
2
2
  #
3
3
  # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- # the transcription of texts between writing systems, and more
5
- # specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  # invented languages to some of his devised writing systems.
7
- #
7
+ #
8
8
  # Copyright (C) 2015 Benjamin Babut (Talagan).
9
- #
9
+ #
10
10
  # This program is free software: you can redistribute it and/or modify
11
11
  # it under the terms of the GNU Affero General Public License as published by
12
12
  # the Free Software Foundation, either version 3 of the License, or
13
13
  # any later version.
14
- #
14
+ #
15
15
  # This program is distributed in the hope that it will be useful,
16
16
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
17
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
18
  # GNU Affero General Public License for more details.
19
- #
19
+ #
20
20
  # You should have received a copy of the GNU Affero General Public License
21
21
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
23
23
  module Glaemscribe
24
24
  module API
25
-
25
+
26
26
  class CharsetParser
27
-
27
+
28
28
  def initialize()
29
29
  @charset = nil
30
30
  end
31
-
31
+
32
32
  def parse(file_path)
33
- @charset = Charset.new(ResourceManager::charset_name_from_file_path(file_path))
34
-
33
+ @charset = Charset.new(ResourceManager::charset_name_from_file_path(file_path))
34
+
35
35
  raw = File.open(file_path,"rb:utf-8").read
36
36
  doc = Glaeml::Parser.new.parse(raw)
37
37
 
@@ -39,33 +39,40 @@ module Glaemscribe
39
39
  @charset.errors = doc.errors
40
40
  return @charset
41
41
  end
42
-
42
+
43
43
  # TODO : verify charset glaeml like we do with modes
44
-
44
+
45
45
  doc.root_node.gpath("char").each { |char_element|
46
46
  code = char_element.args[0].hex
47
47
  names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
48
48
  @charset.add_char(char_element.line,code,names)
49
49
  }
50
-
51
- doc.root_node.gpath("seq").each{ |seq_elemnt|
50
+
51
+ doc.root_node.gpath("seq").each{ |seq_elemnt|
52
52
  names = seq_elemnt.args
53
53
  child_node = seq_elemnt.children.first
54
54
  seq = (child_node && child_node.text?)?(child_node.args.first):("")
55
55
  @charset.add_sequence_char(seq_elemnt.line,names,seq)
56
56
  }
57
-
57
+
58
+ doc.root_node.gpath("swap").each{ |element|
59
+ trigger_one = element.args.first
60
+ text_lines = element.children.select{ |c| c.text? }.map{ |c| c.args.first }
61
+ second_triggers = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
62
+ @charset.add_swap(element.line, trigger_one, second_triggers)
63
+ }
64
+
58
65
  doc.root_node.gpath("virtual").each { |virtual_element|
59
66
  names = virtual_element.args
60
- reversed = false
67
+ reversed = false
61
68
  default = nil
62
69
  classes = []
63
-
70
+
64
71
  virtual_element.gpath("class").each { |class_element|
65
72
  vc = Charset::VirtualChar::VirtualClass.new
66
73
  vc.target = class_element.args[0]
67
74
  vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
68
-
75
+
69
76
  # Allow triggers to be defined inside the body of the class element
70
77
  text_lines = class_element.children.select { |c| c.text? }.map{ |c| c.args.first}
71
78
  inner_triggers = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
@@ -73,21 +80,21 @@ module Glaemscribe
73
80
 
74
81
  classes << vc
75
82
  }
76
- virtual_element.gpath("reversed").each { |reversed_element|
83
+ virtual_element.gpath("reversed").each { |reversed_element|
77
84
  reversed = true
78
85
  }
79
- virtual_element.gpath("default").each { |default_element|
86
+ virtual_element.gpath("default").each { |default_element|
80
87
  default = default_element.args[0]
81
88
  }
82
-
89
+
83
90
  @charset.add_virtual_char(virtual_element.line,classes,names,reversed,default)
84
91
  }
85
-
92
+
86
93
  @charset.finalize
87
-
88
- @charset
94
+
95
+ @charset
89
96
  end
90
-
97
+
91
98
  end
92
99
  end
93
100
  end
@@ -1,22 +1,22 @@
1
1
  # encoding: UTF-8
2
2
  #
3
3
  # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- # the transcription of texts between writing systems, and more
5
- # specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  # invented languages to some of his devised writing systems.
7
- #
7
+ #
8
8
  # Copyright (C) 2015 Benjamin Babut (Talagan).
9
- #
9
+ #
10
10
  # This program is free software: you can redistribute it and/or modify
11
11
  # it under the terms of the GNU Affero General Public License as published by
12
12
  # the Free Software Foundation, either version 3 of the License, or
13
13
  # any later version.
14
- #
14
+ #
15
15
  # This program is distributed in the hope that it will be useful,
16
16
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
17
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
18
  # GNU Affero General Public License for more details.
19
- #
19
+ #
20
20
  # You should have received a copy of the GNU Affero General Public License
21
21
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
@@ -24,25 +24,25 @@ module Glaemscribe
24
24
  module API
25
25
 
26
26
  class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
27
-
27
+
28
28
  def finalize(trans_options)
29
29
  super(trans_options)
30
30
  @last_triggers = {} # Allocate the lookup here to optimize
31
31
  end
32
-
32
+
33
33
  def reset_trigger_states(charset)
34
34
  # For each virtual char in charset, maintain a state.
35
35
  charset.virtual_chars.each{ |vc|
36
36
  @last_triggers[vc] = nil # Clear the state
37
37
  }
38
38
  end
39
-
39
+
40
40
  def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
41
41
  if token == '*SPACE' || token =='*LF'
42
42
  reset_trigger_states(charset)
43
43
  return
44
44
  end
45
-
45
+
46
46
  # Check if token is a virtual char
47
47
  c = charset[token]
48
48
  return if c.nil? # May happen for empty tokens
@@ -54,14 +54,14 @@ module Glaemscribe
54
54
  token = new_tokens[idx] # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
55
55
  end
56
56
  end
57
-
57
+
58
58
  # Update states of virtual classes
59
59
  charset.virtual_chars.each{|vc|
60
60
  rc = vc[token]
61
- @last_triggers[vc] = rc if rc != nil
61
+ @last_triggers[vc] = rc if rc != nil
62
62
  }
63
63
  end
64
-
64
+
65
65
  def apply_sequences(charset,tokens)
66
66
  ret = []
67
67
  tokens.each { |token|
@@ -74,21 +74,43 @@ module Glaemscribe
74
74
  }
75
75
  ret
76
76
  end
77
-
77
+
78
+ def apply_swaps(charset, tokens)
79
+
80
+ idx = 0
81
+ while idx < tokens.length - 1
82
+ tok = tokens[idx]
83
+ tgt = tokens[idx+1]
84
+
85
+ trig = charset.swap_for_trigger(tok)
86
+
87
+ if trig && trig.has_target?(tgt)
88
+ tokens[idx+1] = tok
89
+ tokens[idx] = tgt
90
+ end
91
+
92
+ idx += 1
93
+ end
94
+
95
+ tokens
96
+ end
97
+
78
98
  def apply(tokens,charset)
79
99
  # Apply sequence chars
80
100
  tokens = apply_sequences(charset,tokens)
81
-
101
+
102
+ tokens = apply_swaps(charset, tokens)
103
+
82
104
  # Clone the tokens so that we can perform ligatures AND diacritics without interferences
83
105
  new_tokens = tokens.clone
84
-
106
+
85
107
  # Handle l to r virtuals (diacritics ?)
86
- reset_trigger_states(charset)
108
+ reset_trigger_states(charset)
87
109
  tokens.each_with_index{ |token,idx|
88
110
  apply_loop(charset,tokens,new_tokens,false,token,idx)
89
111
  }
90
112
  # Handle r to l virtuals (ligatures ?)
91
- reset_trigger_states(charset)
113
+ reset_trigger_states(charset)
92
114
  tokens.reverse_each.with_index{ |token,idx|
93
115
  apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
94
116
  }
@@ -96,7 +118,7 @@ module Glaemscribe
96
118
  end
97
119
  end
98
120
 
99
- ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
121
+ ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
100
122
 
101
123
  end
102
124
  end