glaemscribe 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/glaemscribe +2 -2
- data/glaemresources/charsets/cirth_ds.cst +514 -179
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
- data/glaemresources/charsets/tengwar_freemono.cst +1 -1
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
- data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
- data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +9 -4
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/lib/api/charset_parser.rb +7 -1
- data/lib/api/mode.rb +35 -10
- data/lib/api/mode_parser.rb +21 -12
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/rule_group.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +8 -5
- data/lib/api/transcription_processor.rb +12 -9
- data/lib/glaemscribe.rb +2 -0
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
- data/lib_espeak/glaemscribe_tts.js +363 -223
- metadata +12 -6
@@ -27,7 +27,8 @@ module Glaemscribe
|
|
27
27
|
attr_reader :glaeml_element
|
28
28
|
attr_reader :finalized_glaeml_element
|
29
29
|
|
30
|
-
def initialize(glaeml_element)
|
30
|
+
def initialize(mode, glaeml_element)
|
31
|
+
@mode = mode
|
31
32
|
@glaeml_element = glaeml_element
|
32
33
|
end
|
33
34
|
|
@@ -121,15 +122,17 @@ module Glaemscribe
|
|
121
122
|
attr_accessor :out_space
|
122
123
|
|
123
124
|
def apply(tokens, out_charset)
|
124
|
-
|
125
|
-
out_space_str = " "
|
126
|
-
out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
|
127
125
|
|
128
126
|
# Apply filters
|
129
127
|
@operators.each{ |operator|
|
130
128
|
tokens = operator.apply(tokens,out_charset)
|
131
129
|
}
|
132
|
-
|
130
|
+
|
131
|
+
out_space_str = " "
|
132
|
+
out_space_str = @out_space.map{ |token|
|
133
|
+
out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
|
134
|
+
}.join("") if @out_space
|
135
|
+
|
133
136
|
# Convert output
|
134
137
|
ret = ""
|
135
138
|
tokens.each{ |token|
|
@@ -73,7 +73,7 @@ module Glaemscribe
|
|
73
73
|
}
|
74
74
|
end
|
75
75
|
|
76
|
-
def apply(l)
|
76
|
+
def apply(l, debug_context)
|
77
77
|
ret = []
|
78
78
|
current_group = nil
|
79
79
|
accumulated_word = ""
|
@@ -81,14 +81,14 @@ module Glaemscribe
|
|
81
81
|
l.split("").each{ |c|
|
82
82
|
case c
|
83
83
|
when " ", "\t"
|
84
|
-
ret += transcribe_word(accumulated_word)
|
84
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
85
85
|
ret += ["*SPACE"]
|
86
86
|
|
87
87
|
accumulated_word = ""
|
88
88
|
when "\r"
|
89
89
|
# Ignore
|
90
90
|
when "\n"
|
91
|
-
ret += transcribe_word(accumulated_word)
|
91
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
92
92
|
ret += ["*LF"]
|
93
93
|
|
94
94
|
accumulated_word = ""
|
@@ -97,24 +97,27 @@ module Glaemscribe
|
|
97
97
|
if c_group == current_group
|
98
98
|
accumulated_word += c
|
99
99
|
else
|
100
|
-
ret += transcribe_word(accumulated_word)
|
100
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
101
101
|
current_group = c_group
|
102
102
|
accumulated_word = c
|
103
103
|
end
|
104
104
|
end
|
105
105
|
}
|
106
106
|
# Just in case
|
107
|
-
ret += transcribe_word(accumulated_word)
|
107
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
108
108
|
ret
|
109
109
|
end
|
110
110
|
|
111
|
-
def transcribe_word(word)
|
111
|
+
def transcribe_word(word, debug_context)
|
112
112
|
res = []
|
113
113
|
word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
|
114
114
|
while word.length != 0
|
115
|
-
|
116
|
-
word
|
117
|
-
|
115
|
+
tokens, len = @transcription_tree.transcribe(word)
|
116
|
+
word = word[len..-1]
|
117
|
+
eaten = word[0..len-1]
|
118
|
+
res += tokens
|
119
|
+
|
120
|
+
debug_context.processor_pathes << [eaten, tokens, tokens]
|
118
121
|
end
|
119
122
|
# Return token list
|
120
123
|
res
|
data/lib/glaemscribe.rb
CHANGED
@@ -67,6 +67,8 @@ module Glaemscribe
|
|
67
67
|
require API_PATH + "pre_processor/substitute.rb"
|
68
68
|
require API_PATH + "pre_processor/rxsubstitute.rb"
|
69
69
|
require API_PATH + "pre_processor/up_down_tehta_split.rb"
|
70
|
+
|
71
|
+
require API_PATH + "post_processor/outspace.rb"
|
70
72
|
require API_PATH + "post_processor/reverse.rb"
|
71
73
|
require API_PATH + "post_processor/resolve_virtuals.rb"
|
72
74
|
|