glaemscribe 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -27,7 +27,8 @@ module Glaemscribe
27
27
  attr_reader :glaeml_element
28
28
  attr_reader :finalized_glaeml_element
29
29
 
30
- def initialize(glaeml_element)
30
+ def initialize(mode, glaeml_element)
31
+ @mode = mode
31
32
  @glaeml_element = glaeml_element
32
33
  end
33
34
 
@@ -121,15 +122,17 @@ module Glaemscribe
121
122
  attr_accessor :out_space
122
123
 
123
124
  def apply(tokens, out_charset)
124
-
125
- out_space_str = " "
126
- out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
127
125
 
128
126
  # Apply filters
129
127
  @operators.each{ |operator|
130
128
  tokens = operator.apply(tokens,out_charset)
131
129
  }
132
-
130
+
131
+ out_space_str = " "
132
+ out_space_str = @out_space.map{ |token|
133
+ out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
134
+ }.join("") if @out_space
135
+
133
136
  # Convert output
134
137
  ret = ""
135
138
  tokens.each{ |token|
@@ -73,7 +73,7 @@ module Glaemscribe
73
73
  }
74
74
  end
75
75
 
76
- def apply(l)
76
+ def apply(l, debug_context)
77
77
  ret = []
78
78
  current_group = nil
79
79
  accumulated_word = ""
@@ -81,14 +81,14 @@ module Glaemscribe
81
81
  l.split("").each{ |c|
82
82
  case c
83
83
  when " ", "\t"
84
- ret += transcribe_word(accumulated_word)
84
+ ret += transcribe_word(accumulated_word, debug_context)
85
85
  ret += ["*SPACE"]
86
86
 
87
87
  accumulated_word = ""
88
88
  when "\r"
89
89
  # Ignore
90
90
  when "\n"
91
- ret += transcribe_word(accumulated_word)
91
+ ret += transcribe_word(accumulated_word, debug_context)
92
92
  ret += ["*LF"]
93
93
 
94
94
  accumulated_word = ""
@@ -97,24 +97,27 @@ module Glaemscribe
97
97
  if c_group == current_group
98
98
  accumulated_word += c
99
99
  else
100
- ret += transcribe_word(accumulated_word)
100
+ ret += transcribe_word(accumulated_word, debug_context)
101
101
  current_group = c_group
102
102
  accumulated_word = c
103
103
  end
104
104
  end
105
105
  }
106
106
  # Just in case
107
- ret += transcribe_word(accumulated_word)
107
+ ret += transcribe_word(accumulated_word, debug_context)
108
108
  ret
109
109
  end
110
110
 
111
- def transcribe_word(word)
111
+ def transcribe_word(word, debug_context)
112
112
  res = []
113
113
  word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
114
114
  while word.length != 0
115
- r, len = @transcription_tree.transcribe(word)
116
- word = word[len..-1]
117
- res += r
115
+ tokens, len = @transcription_tree.transcribe(word)
116
+ word = word[len..-1]
117
+ eaten = word[0..len-1]
118
+ res += tokens
119
+
120
+ debug_context.processor_pathes << [eaten, tokens, tokens]
118
121
  end
119
122
  # Return token list
120
123
  res
data/lib/glaemscribe.rb CHANGED
@@ -67,6 +67,8 @@ module Glaemscribe
67
67
  require API_PATH + "pre_processor/substitute.rb"
68
68
  require API_PATH + "pre_processor/rxsubstitute.rb"
69
69
  require API_PATH + "pre_processor/up_down_tehta_split.rb"
70
+
71
+ require API_PATH + "post_processor/outspace.rb"
70
72
  require API_PATH + "post_processor/reverse.rb"
71
73
  require API_PATH + "post_processor/resolve_virtuals.rb"
72
74