glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -27,7 +27,8 @@ module Glaemscribe
27
27
  attr_reader :glaeml_element
28
28
  attr_reader :finalized_glaeml_element
29
29
 
30
- def initialize(glaeml_element)
30
+ def initialize(mode, glaeml_element)
31
+ @mode = mode
31
32
  @glaeml_element = glaeml_element
32
33
  end
33
34
 
@@ -121,15 +122,17 @@ module Glaemscribe
121
122
  attr_accessor :out_space
122
123
 
123
124
  def apply(tokens, out_charset)
124
-
125
- out_space_str = " "
126
- out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
127
125
 
128
126
  # Apply filters
129
127
  @operators.each{ |operator|
130
128
  tokens = operator.apply(tokens,out_charset)
131
129
  }
132
-
130
+
131
+ out_space_str = " "
132
+ out_space_str = @out_space.map{ |token|
133
+ out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
134
+ }.join("") if @out_space
135
+
133
136
  # Convert output
134
137
  ret = ""
135
138
  tokens.each{ |token|
@@ -73,7 +73,7 @@ module Glaemscribe
73
73
  }
74
74
  end
75
75
 
76
- def apply(l)
76
+ def apply(l, debug_context)
77
77
  ret = []
78
78
  current_group = nil
79
79
  accumulated_word = ""
@@ -81,14 +81,14 @@ module Glaemscribe
81
81
  l.split("").each{ |c|
82
82
  case c
83
83
  when " ", "\t"
84
- ret += transcribe_word(accumulated_word)
84
+ ret += transcribe_word(accumulated_word, debug_context)
85
85
  ret += ["*SPACE"]
86
86
 
87
87
  accumulated_word = ""
88
88
  when "\r"
89
89
  # Ignore
90
90
  when "\n"
91
- ret += transcribe_word(accumulated_word)
91
+ ret += transcribe_word(accumulated_word, debug_context)
92
92
  ret += ["*LF"]
93
93
 
94
94
  accumulated_word = ""
@@ -97,24 +97,27 @@ module Glaemscribe
97
97
  if c_group == current_group
98
98
  accumulated_word += c
99
99
  else
100
- ret += transcribe_word(accumulated_word)
100
+ ret += transcribe_word(accumulated_word, debug_context)
101
101
  current_group = c_group
102
102
  accumulated_word = c
103
103
  end
104
104
  end
105
105
  }
106
106
  # Just in case
107
- ret += transcribe_word(accumulated_word)
107
+ ret += transcribe_word(accumulated_word, debug_context)
108
108
  ret
109
109
  end
110
110
 
111
- def transcribe_word(word)
111
+ def transcribe_word(word, debug_context)
112
112
  res = []
113
113
  word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
114
114
  while word.length != 0
115
- r, len = @transcription_tree.transcribe(word)
116
- word = word[len..-1]
117
- res += r
115
+ tokens, len = @transcription_tree.transcribe(word)
116
+ word = word[len..-1]
117
+ eaten = word[0..len-1]
118
+ res += tokens
119
+
120
+ debug_context.processor_pathes << [eaten, tokens, tokens]
118
121
  end
119
122
  # Return token list
120
123
  res
data/lib/glaemscribe.rb CHANGED
@@ -67,6 +67,8 @@ module Glaemscribe
67
67
  require API_PATH + "pre_processor/substitute.rb"
68
68
  require API_PATH + "pre_processor/rxsubstitute.rb"
69
69
  require API_PATH + "pre_processor/up_down_tehta_split.rb"
70
+
71
+ require API_PATH + "post_processor/outspace.rb"
70
72
  require API_PATH + "post_processor/reverse.rb"
71
73
  require API_PATH + "post_processor/resolve_virtuals.rb"
72
74