glaemscribe 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset.rb +124 -57
  24. data/lib/api/charset_parser.rb +39 -26
  25. data/lib/api/mode.rb +35 -10
  26. data/lib/api/mode_parser.rb +21 -12
  27. data/lib/api/post_processor/outspace.rb +44 -0
  28. data/lib/api/post_processor/resolve_virtuals.rb +41 -19
  29. data/lib/api/rule_group.rb +1 -1
  30. data/lib/api/transcription_pre_post_processor.rb +51 -45
  31. data/lib/api/transcription_processor.rb +12 -9
  32. data/lib/glaemscribe.rb +2 -0
  33. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  34. data/lib_espeak/glaemscribe_tts.js +363 -223
  35. metadata +12 -6
@@ -0,0 +1,44 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ # A post processor operator to replace the out_space on the fly.
24
+ # This has the same effect as the \outspace parameter
25
+ # But can be included in the postprocessor and benefit from the if/then logic
26
+
27
+ module Glaemscribe
28
+ module API
29
+
30
+ class OutspacePostProcessorOperator < PostProcessorOperator
31
+ def initialize(mode, glaeml_element)
32
+ super(mode, glaeml_element)
33
+ @out_space = @mode.post_processor.out_space = glaeml_element.args[0].split.reject{|token| token.empty? }
34
+ end
35
+
36
+ def apply(tokens, charset)
37
+ @mode.post_processor.out_space = @out_space
38
+ tokens
39
+ end
40
+ end
41
+
42
+ ResourceManager::register_post_processor_class("outspace", OutspacePostProcessorOperator)
43
+ end
44
+ end
@@ -1,22 +1,22 @@
1
1
  # encoding: UTF-8
2
2
  #
3
3
  # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- # the transcription of texts between writing systems, and more
5
- # specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  # invented languages to some of his devised writing systems.
7
- #
7
+ #
8
8
  # Copyright (C) 2015 Benjamin Babut (Talagan).
9
- #
9
+ #
10
10
  # This program is free software: you can redistribute it and/or modify
11
11
  # it under the terms of the GNU Affero General Public License as published by
12
12
  # the Free Software Foundation, either version 3 of the License, or
13
13
  # any later version.
14
- #
14
+ #
15
15
  # This program is distributed in the hope that it will be useful,
16
16
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
17
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
18
  # GNU Affero General Public License for more details.
19
- #
19
+ #
20
20
  # You should have received a copy of the GNU Affero General Public License
21
21
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
@@ -24,25 +24,25 @@ module Glaemscribe
24
24
  module API
25
25
 
26
26
  class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
27
-
27
+
28
28
  def finalize(trans_options)
29
29
  super(trans_options)
30
30
  @last_triggers = {} # Allocate the lookup here to optimize
31
31
  end
32
-
32
+
33
33
  def reset_trigger_states(charset)
34
34
  # For each virtual char in charset, maintain a state.
35
35
  charset.virtual_chars.each{ |vc|
36
36
  @last_triggers[vc] = nil # Clear the state
37
37
  }
38
38
  end
39
-
39
+
40
40
  def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
41
41
  if token == '*SPACE' || token =='*LF'
42
42
  reset_trigger_states(charset)
43
43
  return
44
44
  end
45
-
45
+
46
46
  # Check if token is a virtual char
47
47
  c = charset[token]
48
48
  return if c.nil? # May happen for empty tokens
@@ -54,14 +54,14 @@ module Glaemscribe
54
54
  token = new_tokens[idx] # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
55
55
  end
56
56
  end
57
-
57
+
58
58
  # Update states of virtual classes
59
59
  charset.virtual_chars.each{|vc|
60
60
  rc = vc[token]
61
- @last_triggers[vc] = rc if rc != nil
61
+ @last_triggers[vc] = rc if rc != nil
62
62
  }
63
63
  end
64
-
64
+
65
65
  def apply_sequences(charset,tokens)
66
66
  ret = []
67
67
  tokens.each { |token|
@@ -74,21 +74,43 @@ module Glaemscribe
74
74
  }
75
75
  ret
76
76
  end
77
-
77
+
78
+ def apply_swaps(charset, tokens)
79
+
80
+ idx = 0
81
+ while idx < tokens.length - 1
82
+ tok = tokens[idx]
83
+ tgt = tokens[idx+1]
84
+
85
+ trig = charset.swap_for_trigger(tok)
86
+
87
+ if trig && trig.has_target?(tgt)
88
+ tokens[idx+1] = tok
89
+ tokens[idx] = tgt
90
+ end
91
+
92
+ idx += 1
93
+ end
94
+
95
+ tokens
96
+ end
97
+
78
98
  def apply(tokens,charset)
79
99
  # Apply sequence chars
80
100
  tokens = apply_sequences(charset,tokens)
81
-
101
+
102
+ tokens = apply_swaps(charset, tokens)
103
+
82
104
  # Clone the tokens so that we can perform ligatures AND diacritics without interferences
83
105
  new_tokens = tokens.clone
84
-
106
+
85
107
  # Handle l to r virtuals (diacritics ?)
86
- reset_trigger_states(charset)
108
+ reset_trigger_states(charset)
87
109
  tokens.each_with_index{ |token,idx|
88
110
  apply_loop(charset,tokens,new_tokens,false,token,idx)
89
111
  }
90
112
  # Handle r to l virtuals (ligatures ?)
91
- reset_trigger_states(charset)
113
+ reset_trigger_states(charset)
92
114
  tokens.reverse_each.with_index{ |token,idx|
93
115
  apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
94
116
  }
@@ -96,7 +118,7 @@ module Glaemscribe
96
118
  end
97
119
  end
98
120
 
99
- ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
121
+ ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
100
122
 
101
123
  end
102
124
  end
@@ -138,7 +138,7 @@ module Glaemscribe
138
138
  var_value = apply_vars(term.line, var_value_ex, true)
139
139
 
140
140
  if !var_value
141
- @mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{var_name}} could not be declared.")
141
+ @mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{arg_name}} could not be declared.")
142
142
  end
143
143
  end
144
144
 
@@ -1,45 +1,46 @@
1
1
  # encoding: UTF-8
2
2
  #
3
3
  # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
- # the transcription of texts between writing systems, and more
5
- # specifically dedicated to the transcription of J.R.R. Tolkien's
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
6
  # invented languages to some of his devised writing systems.
7
- #
7
+ #
8
8
  # Copyright (C) 2015 Benjamin Babut (Talagan).
9
- #
9
+ #
10
10
  # This program is free software: you can redistribute it and/or modify
11
11
  # it under the terms of the GNU Affero General Public License as published by
12
12
  # the Free Software Foundation, either version 3 of the License, or
13
13
  # any later version.
14
- #
14
+ #
15
15
  # This program is distributed in the hope that it will be useful,
16
16
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
17
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
18
  # GNU Affero General Public License for more details.
19
- #
19
+ #
20
20
  # You should have received a copy of the GNU Affero General Public License
21
21
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
23
23
  module Glaemscribe
24
24
  module API
25
-
25
+
26
26
  class PrePostProcessorOperator
27
27
  attr_reader :glaeml_element
28
28
  attr_reader :finalized_glaeml_element
29
-
30
- def initialize(glaeml_element)
29
+
30
+ def initialize(mode, glaeml_element)
31
+ @mode = mode
31
32
  @glaeml_element = glaeml_element
32
33
  end
33
-
34
+
34
35
  def eval_arg(arg, trans_options)
35
36
  return nil if arg.nil?
36
37
  if arg =~ /^\\eval\s/
37
38
  to_eval = $'
38
39
  return Eval::Parser.new().parse(to_eval, trans_options)
39
40
  end
40
- return arg
41
+ return arg
41
42
  end
42
-
43
+
43
44
  def finalize_glaeml_element(ge, trans_options)
44
45
  ge.args.map! { |arg| eval_arg(arg, trans_options) }
45
46
  ge.children.each{ |child|
@@ -47,37 +48,37 @@ module Glaemscribe
47
48
  }
48
49
  ge
49
50
  end
50
-
51
+
51
52
  def finalize(trans_options)
52
53
  @finalized_glaeml_element = finalize_glaeml_element(@glaeml_element.clone, trans_options)
53
54
  end
54
-
55
+
55
56
  def apply
56
57
  raise "Pure virtual method, should be overloaded."
57
58
  end
58
59
  end
59
-
60
+
60
61
  class TranscriptionPrePostProcessor
61
62
  attr_reader :root_code_block
62
-
63
+
63
64
  attr_reader :operators
64
-
65
+
65
66
  def initialize(mode)
66
67
  @mode = mode
67
- @root_code_block = IfTree::CodeBlock.new
68
+ @root_code_block = IfTree::CodeBlock.new
68
69
  end
69
-
70
+
70
71
  def descend_if_tree(code_block, trans_options)
71
- code_block.terms.each{ |term|
72
+ code_block.terms.each{ |term|
72
73
  if(term.is_pre_post_processor_operators?)
73
74
  term.operators.each{ |operator|
74
75
  @operators << operator
75
- }
76
+ }
76
77
  else
77
78
  term.if_conds.each{ |if_cond|
78
-
79
+
79
80
  if_eval = Eval::Parser.new()
80
-
81
+
81
82
  if(if_eval.parse(if_cond.expression, trans_options) == true)
82
83
  descend_if_tree(if_cond.child_code_block, trans_options)
83
84
  break
@@ -86,7 +87,7 @@ module Glaemscribe
86
87
  end
87
88
  }
88
89
  end
89
-
90
+
90
91
  def finalize(trans_options)
91
92
  @operators = []
92
93
  # Select operators depending on conditions
@@ -98,42 +99,47 @@ module Glaemscribe
98
99
  end
99
100
 
100
101
  end
101
-
102
- class PreProcessorOperator < PrePostProcessorOperator
102
+
103
+ class PreProcessorOperator < PrePostProcessorOperator
103
104
  end
104
-
105
+
105
106
  class PostProcessorOperator < PrePostProcessorOperator
106
107
  end
107
-
108
- class TranscriptionPreProcessor < TranscriptionPrePostProcessor
108
+
109
+ class TranscriptionPreProcessor < TranscriptionPrePostProcessor
109
110
  # Apply all preprocessor rules consecutively
110
111
  def apply(l)
111
112
  ret = l
112
113
  @operators.each{ |operator|
113
114
  ret = operator.apply(ret)
114
- }
115
+ }
115
116
  ret
116
117
  end
117
118
  end
118
-
119
+
119
120
  class TranscriptionPostProcessor < TranscriptionPrePostProcessor
120
-
121
+
121
122
  attr_accessor :out_space
122
-
123
+
123
124
  def apply(tokens, out_charset)
124
-
125
- out_space_str = " "
126
- out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
127
-
125
+
126
+ # Cleanup the output of the chain by removing empty tokens
127
+ tokens.select!{ |tok| tok != "" }
128
+
128
129
  # Apply filters
129
130
  @operators.each{ |operator|
130
131
  tokens = operator.apply(tokens,out_charset)
131
- }
132
-
132
+ }
133
+
134
+ out_space_str = " "
135
+ out_space_str = @out_space.map{ |token|
136
+ out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
137
+ }.join("") if @out_space
138
+
133
139
  # Convert output
134
140
  ret = ""
135
141
  tokens.each{ |token|
136
- case token
142
+ case token
137
143
  when ""
138
144
  when "*UNKNOWN"
139
145
  ret += UNKNOWN_CHAR_OUTPUT
@@ -142,13 +148,13 @@ module Glaemscribe
142
148
  when "*LF"
143
149
  ret += "\n"
144
150
  else
145
- c = out_charset[token]
151
+ c = out_charset[token]
146
152
  ret += (c.nil?)?(UNKNOWN_CHAR_OUTPUT):c.str
147
- end
153
+ end
148
154
  }
149
155
  ret
150
- end
151
- end
152
-
156
+ end
157
+ end
158
+
153
159
  end
154
160
  end
@@ -73,7 +73,7 @@ module Glaemscribe
73
73
  }
74
74
  end
75
75
 
76
- def apply(l)
76
+ def apply(l, debug_context)
77
77
  ret = []
78
78
  current_group = nil
79
79
  accumulated_word = ""
@@ -81,14 +81,14 @@ module Glaemscribe
81
81
  l.split("").each{ |c|
82
82
  case c
83
83
  when " ", "\t"
84
- ret += transcribe_word(accumulated_word)
84
+ ret += transcribe_word(accumulated_word, debug_context)
85
85
  ret += ["*SPACE"]
86
86
 
87
87
  accumulated_word = ""
88
88
  when "\r"
89
89
  # Ignore
90
90
  when "\n"
91
- ret += transcribe_word(accumulated_word)
91
+ ret += transcribe_word(accumulated_word, debug_context)
92
92
  ret += ["*LF"]
93
93
 
94
94
  accumulated_word = ""
@@ -97,24 +97,27 @@ module Glaemscribe
97
97
  if c_group == current_group
98
98
  accumulated_word += c
99
99
  else
100
- ret += transcribe_word(accumulated_word)
100
+ ret += transcribe_word(accumulated_word, debug_context)
101
101
  current_group = c_group
102
102
  accumulated_word = c
103
103
  end
104
104
  end
105
105
  }
106
106
  # Just in case
107
- ret += transcribe_word(accumulated_word)
107
+ ret += transcribe_word(accumulated_word, debug_context)
108
108
  ret
109
109
  end
110
110
 
111
- def transcribe_word(word)
111
+ def transcribe_word(word, debug_context)
112
112
  res = []
113
113
  word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
114
114
  while word.length != 0
115
- r, len = @transcription_tree.transcribe(word)
116
- word = word[len..-1]
117
- res += r
115
+ tokens, len = @transcription_tree.transcribe(word)
116
+ word = word[len..-1]
117
+ eaten = word[0..len-1]
118
+ res += tokens
119
+
120
+ debug_context.processor_pathes << [eaten, tokens, tokens]
118
121
  end
119
122
  # Return token list
120
123
  res
data/lib/glaemscribe.rb CHANGED
@@ -67,6 +67,8 @@ module Glaemscribe
67
67
  require API_PATH + "pre_processor/substitute.rb"
68
68
  require API_PATH + "pre_processor/rxsubstitute.rb"
69
69
  require API_PATH + "pre_processor/up_down_tehta_split.rb"
70
+
71
+ require API_PATH + "post_processor/outspace.rb"
70
72
  require API_PATH + "post_processor/reverse.rb"
71
73
  require API_PATH + "post_processor/resolve_virtuals.rb"
72
74