glaemscribe 1.2.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset.rb +124 -57
  24. data/lib/api/charset_parser.rb +39 -26
  25. data/lib/api/mode.rb +35 -10
  26. data/lib/api/mode_parser.rb +21 -12
  27. data/lib/api/post_processor/outspace.rb +44 -0
  28. data/lib/api/post_processor/resolve_virtuals.rb +41 -19
  29. data/lib/api/rule_group.rb +1 -1
  30. data/lib/api/transcription_pre_post_processor.rb +51 -45
  31. data/lib/api/transcription_processor.rb +12 -9
  32. data/lib/glaemscribe.rb +2 -0
  33. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  34. data/lib_espeak/glaemscribe_tts.js +363 -223
  35. metadata +12 -6
@@ -0,0 +1,44 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ # A post processor operator to replace the out_space on the fly.
24
+ # This has the same effect as the \outspace parameter
25
+ # But can be included in the postprocessor and benefit from the if/then logic
26
+
27
+ module Glaemscribe
28
+ module API
29
+
30
+ class OutspacePostProcessorOperator < PostProcessorOperator
31
+ def initialize(mode, glaeml_element)
32
+ super(mode, glaeml_element)
33
+ @out_space = @mode.post_processor.out_space = glaeml_element.args[0].split.reject{|token| token.empty? }
34
+ end
35
+
36
+ def apply(tokens, charset)
37
+ @mode.post_processor.out_space = @out_space
38
+ tokens
39
+ end
40
+ end
41
+
42
+ ResourceManager::register_post_processor_class("outspace", OutspacePostProcessorOperator)
43
+ end
44
+ end
@@ -1,22 +1,22 @@
1
1
  # encoding: UTF-8
2
2
  #
3
3
  # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- # the transcription of texts between writing systems, and more
5
- # specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  # invented languages to some of his devised writing systems.
7
- #
7
+ #
8
8
  # Copyright (C) 2015 Benjamin Babut (Talagan).
9
- #
9
+ #
10
10
  # This program is free software: you can redistribute it and/or modify
11
11
  # it under the terms of the GNU Affero General Public License as published by
12
12
  # the Free Software Foundation, either version 3 of the License, or
13
13
  # any later version.
14
- #
14
+ #
15
15
  # This program is distributed in the hope that it will be useful,
16
16
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
17
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
18
  # GNU Affero General Public License for more details.
19
- #
19
+ #
20
20
  # You should have received a copy of the GNU Affero General Public License
21
21
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
@@ -24,25 +24,25 @@ module Glaemscribe
24
24
  module API
25
25
 
26
26
  class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
27
-
27
+
28
28
  def finalize(trans_options)
29
29
  super(trans_options)
30
30
  @last_triggers = {} # Allocate the lookup here to optimize
31
31
  end
32
-
32
+
33
33
  def reset_trigger_states(charset)
34
34
  # For each virtual char in charset, maintain a state.
35
35
  charset.virtual_chars.each{ |vc|
36
36
  @last_triggers[vc] = nil # Clear the state
37
37
  }
38
38
  end
39
-
39
+
40
40
  def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
41
41
  if token == '*SPACE' || token =='*LF'
42
42
  reset_trigger_states(charset)
43
43
  return
44
44
  end
45
-
45
+
46
46
  # Check if token is a virtual char
47
47
  c = charset[token]
48
48
  return if c.nil? # May happen for empty tokens
@@ -54,14 +54,14 @@ module Glaemscribe
54
54
  token = new_tokens[idx] # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
55
55
  end
56
56
  end
57
-
57
+
58
58
  # Update states of virtual classes
59
59
  charset.virtual_chars.each{|vc|
60
60
  rc = vc[token]
61
- @last_triggers[vc] = rc if rc != nil
61
+ @last_triggers[vc] = rc if rc != nil
62
62
  }
63
63
  end
64
-
64
+
65
65
  def apply_sequences(charset,tokens)
66
66
  ret = []
67
67
  tokens.each { |token|
@@ -74,21 +74,43 @@ module Glaemscribe
74
74
  }
75
75
  ret
76
76
  end
77
-
77
+
78
+ def apply_swaps(charset, tokens)
79
+
80
+ idx = 0
81
+ while idx < tokens.length - 1
82
+ tok = tokens[idx]
83
+ tgt = tokens[idx+1]
84
+
85
+ trig = charset.swap_for_trigger(tok)
86
+
87
+ if trig && trig.has_target?(tgt)
88
+ tokens[idx+1] = tok
89
+ tokens[idx] = tgt
90
+ end
91
+
92
+ idx += 1
93
+ end
94
+
95
+ tokens
96
+ end
97
+
78
98
  def apply(tokens,charset)
79
99
  # Apply sequence chars
80
100
  tokens = apply_sequences(charset,tokens)
81
-
101
+
102
+ tokens = apply_swaps(charset, tokens)
103
+
82
104
  # Clone the tokens so that we can perform ligatures AND diacritics without interferences
83
105
  new_tokens = tokens.clone
84
-
106
+
85
107
  # Handle l to r virtuals (diacritics ?)
86
- reset_trigger_states(charset)
108
+ reset_trigger_states(charset)
87
109
  tokens.each_with_index{ |token,idx|
88
110
  apply_loop(charset,tokens,new_tokens,false,token,idx)
89
111
  }
90
112
  # Handle r to l virtuals (ligatures ?)
91
- reset_trigger_states(charset)
113
+ reset_trigger_states(charset)
92
114
  tokens.reverse_each.with_index{ |token,idx|
93
115
  apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
94
116
  }
@@ -96,7 +118,7 @@ module Glaemscribe
96
118
  end
97
119
  end
98
120
 
99
- ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
121
+ ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
100
122
 
101
123
  end
102
124
  end
@@ -138,7 +138,7 @@ module Glaemscribe
138
138
  var_value = apply_vars(term.line, var_value_ex, true)
139
139
 
140
140
  if !var_value
141
- @mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{var_name}} could not be declared.")
141
+ @mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{arg_name}} could not be declared.")
142
142
  end
143
143
  end
144
144
 
@@ -1,45 +1,46 @@
1
1
  # encoding: UTF-8
2
2
  #
3
3
  # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- # the transcription of texts between writing systems, and more
5
- # specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  # invented languages to some of his devised writing systems.
7
- #
7
+ #
8
8
  # Copyright (C) 2015 Benjamin Babut (Talagan).
9
- #
9
+ #
10
10
  # This program is free software: you can redistribute it and/or modify
11
11
  # it under the terms of the GNU Affero General Public License as published by
12
12
  # the Free Software Foundation, either version 3 of the License, or
13
13
  # any later version.
14
- #
14
+ #
15
15
  # This program is distributed in the hope that it will be useful,
16
16
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
17
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
18
  # GNU Affero General Public License for more details.
19
- #
19
+ #
20
20
  # You should have received a copy of the GNU Affero General Public License
21
21
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
23
23
  module Glaemscribe
24
24
  module API
25
-
25
+
26
26
  class PrePostProcessorOperator
27
27
  attr_reader :glaeml_element
28
28
  attr_reader :finalized_glaeml_element
29
-
30
- def initialize(glaeml_element)
29
+
30
+ def initialize(mode, glaeml_element)
31
+ @mode = mode
31
32
  @glaeml_element = glaeml_element
32
33
  end
33
-
34
+
34
35
  def eval_arg(arg, trans_options)
35
36
  return nil if arg.nil?
36
37
  if arg =~ /^\\eval\s/
37
38
  to_eval = $'
38
39
  return Eval::Parser.new().parse(to_eval, trans_options)
39
40
  end
40
- return arg
41
+ return arg
41
42
  end
42
-
43
+
43
44
  def finalize_glaeml_element(ge, trans_options)
44
45
  ge.args.map! { |arg| eval_arg(arg, trans_options) }
45
46
  ge.children.each{ |child|
@@ -47,37 +48,37 @@ module Glaemscribe
47
48
  }
48
49
  ge
49
50
  end
50
-
51
+
51
52
  def finalize(trans_options)
52
53
  @finalized_glaeml_element = finalize_glaeml_element(@glaeml_element.clone, trans_options)
53
54
  end
54
-
55
+
55
56
  def apply
56
57
  raise "Pure virtual method, should be overloaded."
57
58
  end
58
59
  end
59
-
60
+
60
61
  class TranscriptionPrePostProcessor
61
62
  attr_reader :root_code_block
62
-
63
+
63
64
  attr_reader :operators
64
-
65
+
65
66
  def initialize(mode)
66
67
  @mode = mode
67
- @root_code_block = IfTree::CodeBlock.new
68
+ @root_code_block = IfTree::CodeBlock.new
68
69
  end
69
-
70
+
70
71
  def descend_if_tree(code_block, trans_options)
71
- code_block.terms.each{ |term|
72
+ code_block.terms.each{ |term|
72
73
  if(term.is_pre_post_processor_operators?)
73
74
  term.operators.each{ |operator|
74
75
  @operators << operator
75
- }
76
+ }
76
77
  else
77
78
  term.if_conds.each{ |if_cond|
78
-
79
+
79
80
  if_eval = Eval::Parser.new()
80
-
81
+
81
82
  if(if_eval.parse(if_cond.expression, trans_options) == true)
82
83
  descend_if_tree(if_cond.child_code_block, trans_options)
83
84
  break
@@ -86,7 +87,7 @@ module Glaemscribe
86
87
  end
87
88
  }
88
89
  end
89
-
90
+
90
91
  def finalize(trans_options)
91
92
  @operators = []
92
93
  # Select operators depending on conditions
@@ -98,42 +99,47 @@ module Glaemscribe
98
99
  end
99
100
 
100
101
  end
101
-
102
- class PreProcessorOperator < PrePostProcessorOperator
102
+
103
+ class PreProcessorOperator < PrePostProcessorOperator
103
104
  end
104
-
105
+
105
106
  class PostProcessorOperator < PrePostProcessorOperator
106
107
  end
107
-
108
- class TranscriptionPreProcessor < TranscriptionPrePostProcessor
108
+
109
+ class TranscriptionPreProcessor < TranscriptionPrePostProcessor
109
110
  # Apply all preprocessor rules consecutively
110
111
  def apply(l)
111
112
  ret = l
112
113
  @operators.each{ |operator|
113
114
  ret = operator.apply(ret)
114
- }
115
+ }
115
116
  ret
116
117
  end
117
118
  end
118
-
119
+
119
120
  class TranscriptionPostProcessor < TranscriptionPrePostProcessor
120
-
121
+
121
122
  attr_accessor :out_space
122
-
123
+
123
124
  def apply(tokens, out_charset)
124
-
125
- out_space_str = " "
126
- out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
127
-
125
+
126
+ # Cleanup the output of the chain by removing empty tokens
127
+ tokens.select!{ |tok| tok != "" }
128
+
128
129
  # Apply filters
129
130
  @operators.each{ |operator|
130
131
  tokens = operator.apply(tokens,out_charset)
131
- }
132
-
132
+ }
133
+
134
+ out_space_str = " "
135
+ out_space_str = @out_space.map{ |token|
136
+ out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
137
+ }.join("") if @out_space
138
+
133
139
  # Convert output
134
140
  ret = ""
135
141
  tokens.each{ |token|
136
- case token
142
+ case token
137
143
  when ""
138
144
  when "*UNKNOWN"
139
145
  ret += UNKNOWN_CHAR_OUTPUT
@@ -142,13 +148,13 @@ module Glaemscribe
142
148
  when "*LF"
143
149
  ret += "\n"
144
150
  else
145
- c = out_charset[token]
151
+ c = out_charset[token]
146
152
  ret += (c.nil?)?(UNKNOWN_CHAR_OUTPUT):c.str
147
- end
153
+ end
148
154
  }
149
155
  ret
150
- end
151
- end
152
-
156
+ end
157
+ end
158
+
153
159
  end
154
160
  end
@@ -73,7 +73,7 @@ module Glaemscribe
73
73
  }
74
74
  end
75
75
 
76
- def apply(l)
76
+ def apply(l, debug_context)
77
77
  ret = []
78
78
  current_group = nil
79
79
  accumulated_word = ""
@@ -81,14 +81,14 @@ module Glaemscribe
81
81
  l.split("").each{ |c|
82
82
  case c
83
83
  when " ", "\t"
84
- ret += transcribe_word(accumulated_word)
84
+ ret += transcribe_word(accumulated_word, debug_context)
85
85
  ret += ["*SPACE"]
86
86
 
87
87
  accumulated_word = ""
88
88
  when "\r"
89
89
  # Ignore
90
90
  when "\n"
91
- ret += transcribe_word(accumulated_word)
91
+ ret += transcribe_word(accumulated_word, debug_context)
92
92
  ret += ["*LF"]
93
93
 
94
94
  accumulated_word = ""
@@ -97,24 +97,27 @@ module Glaemscribe
97
97
  if c_group == current_group
98
98
  accumulated_word += c
99
99
  else
100
- ret += transcribe_word(accumulated_word)
100
+ ret += transcribe_word(accumulated_word, debug_context)
101
101
  current_group = c_group
102
102
  accumulated_word = c
103
103
  end
104
104
  end
105
105
  }
106
106
  # Just in case
107
- ret += transcribe_word(accumulated_word)
107
+ ret += transcribe_word(accumulated_word, debug_context)
108
108
  ret
109
109
  end
110
110
 
111
- def transcribe_word(word)
111
+ def transcribe_word(word, debug_context)
112
112
  res = []
113
113
  word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
114
114
  while word.length != 0
115
- r, len = @transcription_tree.transcribe(word)
116
- word = word[len..-1]
117
- res += r
115
+ tokens, len = @transcription_tree.transcribe(word)
116
+ word = word[len..-1]
117
+ eaten = word[0..len-1]
118
+ res += tokens
119
+
120
+ debug_context.processor_pathes << [eaten, tokens, tokens]
118
121
  end
119
122
  # Return token list
120
123
  res
data/lib/glaemscribe.rb CHANGED
@@ -67,6 +67,8 @@ module Glaemscribe
67
67
  require API_PATH + "pre_processor/substitute.rb"
68
68
  require API_PATH + "pre_processor/rxsubstitute.rb"
69
69
  require API_PATH + "pre_processor/up_down_tehta_split.rb"
70
+
71
+ require API_PATH + "post_processor/outspace.rb"
70
72
  require API_PATH + "post_processor/reverse.rb"
71
73
  require API_PATH + "post_processor/resolve_virtuals.rb"
72
74