glaemscribe 1.1.14 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/glaemscribe +21 -17
- data/glaemresources/charsets/cirth_ds.cst +540 -0
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2868 -0
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2729 -0
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2742 -0
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2726 -0
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2722 -0
- data/glaemresources/charsets/tengwar_freemono.cst +217 -0
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2948 -0
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2813 -0
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2808 -0
- data/glaemresources/charsets/tengwar_telcontar.cst +225 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +121 -0
- data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
- data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +13 -3
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +776 -0
- data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
- data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
- data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
- data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
- data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
- data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/glaemresources/modes/raw-tengwar.glaem +46 -23
- data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
- data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
- data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
- data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
- data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
- data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
- data/lib/api/charset.rb +67 -7
- data/lib/api/charset_parser.rb +14 -1
- data/lib/api/constants.rb +3 -4
- data/lib/api/fragment.rb +26 -5
- data/lib/api/if_tree.rb +70 -8
- data/lib/api/macro.rb +40 -0
- data/lib/api/mode.rb +66 -19
- data/lib/api/mode_parser.rb +117 -14
- data/lib/api/object_additions.rb +23 -1
- data/lib/api/option.rb +17 -2
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +25 -9
- data/lib/api/resource_manager.rb +1 -0
- data/lib/api/rule_group.rb +170 -26
- data/lib/api/sheaf_chain_iterator.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +8 -5
- data/lib/api/transcription_processor.rb +15 -12
- data/lib/api/tts.rb +51 -0
- data/lib/glaemscribe.rb +36 -31
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +35 -0
- data/lib_espeak/glaemscribe_tts.js +505 -0
- metadata +76 -24
data/lib/api/charset.rb
CHANGED
@@ -30,11 +30,11 @@ module Glaemscribe
|
|
30
30
|
attr_reader :virtual_chars
|
31
31
|
|
32
32
|
class Char
|
33
|
-
attr_accessor :line
|
34
|
-
attr_accessor :code
|
35
|
-
attr_accessor :names
|
36
|
-
attr_accessor :str
|
37
|
-
attr_accessor :charset
|
33
|
+
attr_accessor :line # Line num in the sourcecode
|
34
|
+
attr_accessor :code # Position in unicode
|
35
|
+
attr_accessor :names # Names
|
36
|
+
attr_accessor :str # How does this char resolve as a string
|
37
|
+
attr_accessor :charset # Pointer to parent charset
|
38
38
|
|
39
39
|
def initialize
|
40
40
|
@names = {}
|
@@ -43,9 +43,13 @@ module Glaemscribe
|
|
43
43
|
def virtual?
|
44
44
|
false
|
45
45
|
end
|
46
|
+
|
47
|
+
def sequence?
|
48
|
+
false
|
49
|
+
end
|
46
50
|
end
|
47
51
|
|
48
|
-
class VirtualChar
|
52
|
+
class VirtualChar # Could have had inheritance here ...
|
49
53
|
attr_accessor :line
|
50
54
|
attr_accessor :names
|
51
55
|
attr_accessor :classes
|
@@ -121,6 +125,45 @@ module Glaemscribe
|
|
121
125
|
def virtual?
|
122
126
|
true
|
123
127
|
end
|
128
|
+
|
129
|
+
def sequence?
|
130
|
+
false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class SequenceChar
|
135
|
+
attr_accessor :line # Line of code
|
136
|
+
attr_accessor :names # Names
|
137
|
+
attr_accessor :sequence # The sequence of chars
|
138
|
+
attr_accessor :charset # Pointer to parent charset
|
139
|
+
|
140
|
+
def virtual?
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
def sequence?
|
145
|
+
true
|
146
|
+
end
|
147
|
+
|
148
|
+
def str
|
149
|
+
# A sequence char should never arrive unreplaced
|
150
|
+
VIRTUAL_CHAR_OUTPUT
|
151
|
+
end
|
152
|
+
|
153
|
+
def finalize
|
154
|
+
if @sequence.count == 0
|
155
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
|
156
|
+
end
|
157
|
+
|
158
|
+
@sequence.each{ |symbol|
|
159
|
+
# Check that the sequence is correct
|
160
|
+
found = @charset[symbol]
|
161
|
+
if !found
|
162
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
|
163
|
+
end
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
124
167
|
end
|
125
168
|
|
126
169
|
def initialize(name)
|
@@ -156,10 +199,21 @@ module Glaemscribe
|
|
156
199
|
@chars << c
|
157
200
|
end
|
158
201
|
|
202
|
+
def add_sequence_char(line, names, seq)
|
203
|
+
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
204
|
+
|
205
|
+
c = SequenceChar.new
|
206
|
+
c.line = line
|
207
|
+
c.names = names
|
208
|
+
c.sequence = seq.split.reject{|token| token.empty? }
|
209
|
+
c.charset = self
|
210
|
+
@chars << c
|
211
|
+
end
|
212
|
+
|
159
213
|
def finalize
|
160
214
|
@errors = []
|
161
215
|
@lookup_table = {}
|
162
|
-
@virtual_chars = []
|
216
|
+
@virtual_chars = [] # A convenient filtered array
|
163
217
|
|
164
218
|
@chars.each { |c|
|
165
219
|
c.names.each { |cname|
|
@@ -179,6 +233,12 @@ module Glaemscribe
|
|
179
233
|
end
|
180
234
|
}
|
181
235
|
|
236
|
+
@chars.each{|c|
|
237
|
+
if c.class == SequenceChar
|
238
|
+
c.finalize
|
239
|
+
end
|
240
|
+
}
|
241
|
+
|
182
242
|
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
183
243
|
end
|
184
244
|
|
data/lib/api/charset_parser.rb
CHANGED
@@ -47,6 +47,13 @@ module Glaemscribe
|
|
47
47
|
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
48
48
|
@charset.add_char(char_element.line,code,names)
|
49
49
|
}
|
50
|
+
|
51
|
+
doc.root_node.gpath("seq").each{ |seq_elemnt|
|
52
|
+
names = seq_elemnt.args
|
53
|
+
child_node = seq_elemnt.children.first
|
54
|
+
seq = (child_node && child_node.text?)?(child_node.args.first):("")
|
55
|
+
@charset.add_sequence_char(seq_elemnt.line,names,seq)
|
56
|
+
}
|
50
57
|
|
51
58
|
doc.root_node.gpath("virtual").each { |virtual_element|
|
52
59
|
names = virtual_element.args
|
@@ -57,7 +64,13 @@ module Glaemscribe
|
|
57
64
|
virtual_element.gpath("class").each { |class_element|
|
58
65
|
vc = Charset::VirtualChar::VirtualClass.new
|
59
66
|
vc.target = class_element.args[0]
|
60
|
-
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
67
|
+
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
68
|
+
|
69
|
+
# Allow triggers to be defined inside the body of the class element
|
70
|
+
text_lines = class_element.children.select { |c| c.text? }.map{ |c| c.args.first}
|
71
|
+
inner_triggers = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
|
72
|
+
vc.triggers += inner_triggers
|
73
|
+
|
61
74
|
classes << vc
|
62
75
|
}
|
63
76
|
virtual_element.gpath("reversed").each { |reversed_element|
|
data/lib/api/constants.rb
CHANGED
@@ -23,11 +23,10 @@
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
25
|
WORD_BREAKER = "|"
|
26
|
-
WORD_BOUNDARY = "_"
|
27
|
-
|
28
|
-
SPECIAL_CHAR_UNDERSCORE = '➊'
|
29
|
-
SPECIAL_CHAR_NBSP = '➋'
|
30
26
|
|
27
|
+
WORD_BOUNDARY_LANG = "_"
|
28
|
+
WORD_BOUNDARY_TREE = "\u0000"
|
29
|
+
|
31
30
|
UNKNOWN_CHAR_OUTPUT = "☠"
|
32
31
|
VIRTUAL_CHAR_OUTPUT = "☢" # When transcribing a virtual char...
|
33
32
|
end
|
data/lib/api/fragment.rb
CHANGED
@@ -41,7 +41,7 @@ module Glaemscribe
|
|
41
41
|
EQUIVALENCE_RX_OUT = /(\(.*?\))/
|
42
42
|
EQUIVALENCE_RX_IN = /\((.*?)\)/
|
43
43
|
|
44
|
-
# Should pass a fragment expression, e.g. : "h(a
|
44
|
+
# Should pass a fragment expression, e.g. : "h(a,ä)(i,ï)"
|
45
45
|
def initialize(sheaf, expression)
|
46
46
|
@sheaf = sheaf
|
47
47
|
@mode = sheaf.mode
|
@@ -49,16 +49,16 @@ module Glaemscribe
|
|
49
49
|
@expression = expression
|
50
50
|
|
51
51
|
# Split the fragment, turn it into an array of arrays, e.g. [[h],[a,ä],[i,ï]]
|
52
|
-
equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }
|
52
|
+
equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }.reject{ |eq| eq == '' }
|
53
53
|
equivalences = equivalences.map{ |eq|
|
54
54
|
eq =~ EQUIVALENCE_RX_IN
|
55
55
|
if $1
|
56
56
|
eq = $1.split(EQUIVALENCE_SEPARATOR,-1).map{ |elt|
|
57
57
|
elt = elt.strip
|
58
|
-
elt.split(/\s/)
|
59
|
-
}
|
58
|
+
elt.split(/\s/).map{ |leaf| finalize_fragment_leaf(leaf) }
|
59
|
+
}
|
60
60
|
else
|
61
|
-
eq = [eq.split(/\s/)] # This equivalence has only one possibility
|
61
|
+
eq = [eq.split(/\s/).map{ |leaf| finalize_fragment_leaf(leaf) }] # This equivalence has only one possibility
|
62
62
|
end
|
63
63
|
}
|
64
64
|
|
@@ -87,6 +87,7 @@ module Glaemscribe
|
|
87
87
|
# Calculate all combinations for this fragment (productize the array of arrays)
|
88
88
|
res = equivalences[0]
|
89
89
|
|
90
|
+
# ((eq0 x eq1) x eq2) x eq3 ) ... )))))
|
90
91
|
(equivalences.length-1).times { |i|
|
91
92
|
prod = res.product(equivalences[i+1]).map{ |x,y| x+y}
|
92
93
|
res = prod
|
@@ -95,6 +96,26 @@ module Glaemscribe
|
|
95
96
|
@combinations = res
|
96
97
|
end
|
97
98
|
|
99
|
+
def finalize_fragment_leaf(leaf)
|
100
|
+
if src?
|
101
|
+
|
102
|
+
# Replace {UNI_XXXX} by its value to allow any unicode char to be found in the transcription tree
|
103
|
+
leaf = leaf.gsub(RuleGroup::UNICODE_VAR_NAME_REGEXP_OUT) { |cap_var|
|
104
|
+
unival = $1
|
105
|
+
new_char = [unival.hex].pack("U")
|
106
|
+
new_char = "\u0001" if new_char == '_'
|
107
|
+
new_char
|
108
|
+
}
|
109
|
+
|
110
|
+
# Replace '_' (word boundary) by '\u0000' to allow
|
111
|
+
# the real underscore to be used in the transcription tree
|
112
|
+
# (Do it after replacing the uni_xxx vars because they have underscores inside)
|
113
|
+
leaf = leaf.gsub(WORD_BOUNDARY_LANG, WORD_BOUNDARY_TREE)
|
114
|
+
leaf = leaf.gsub("\u0001","_")
|
115
|
+
end
|
116
|
+
|
117
|
+
leaf
|
118
|
+
end
|
98
119
|
|
99
120
|
def p
|
100
121
|
ret = "---- " + @expression + "\n"
|
data/lib/api/if_tree.rb
CHANGED
@@ -24,14 +24,36 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
module IfTree
|
26
26
|
|
27
|
+
# A branching if condition
|
27
28
|
class IfCond
|
28
29
|
attr_accessor :line, :expression, :parent_if_term, :child_code_block
|
29
30
|
def initialize(line, parent_if_term, expression)
|
30
31
|
@parent_if_term = parent_if_term
|
31
32
|
@expression = expression
|
32
33
|
end
|
34
|
+
def offset
|
35
|
+
parent_if_term.offset + " "
|
36
|
+
end
|
37
|
+
def prefix
|
38
|
+
offset + "|-"
|
39
|
+
end
|
40
|
+
def inspect
|
41
|
+
"#{prefix} IF #{expression}\n" +
|
42
|
+
"#{child_code_block.inspect}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# A line of code
|
47
|
+
class CodeLine
|
48
|
+
attr_accessor :expression, :line
|
49
|
+
def initialize(expression, line)
|
50
|
+
@expression = expression
|
51
|
+
@line = line
|
52
|
+
end
|
33
53
|
end
|
34
54
|
|
55
|
+
# A node (code lines / preprocessor operators / ... )
|
56
|
+
# A node may have children or not depending on their nature
|
35
57
|
class Term
|
36
58
|
attr_accessor :parent_code_block
|
37
59
|
def initialize(parent_code_block)
|
@@ -43,24 +65,30 @@ module Glaemscribe
|
|
43
65
|
def is_pre_post_processor_operators?
|
44
66
|
false
|
45
67
|
end
|
68
|
+
def is_macro_deploy?
|
69
|
+
false
|
70
|
+
end
|
71
|
+
def offset
|
72
|
+
parent_code_block.offset + " "
|
73
|
+
end
|
74
|
+
def prefix
|
75
|
+
offset + "|- "
|
76
|
+
end
|
46
77
|
end
|
47
78
|
|
79
|
+
# A ifterm may have multiple ifconds (if,elsif,elsif,...,else)
|
48
80
|
class IfTerm < Term
|
49
81
|
attr_accessor :if_conds
|
50
82
|
def initialize(parent_code_block)
|
51
83
|
super(parent_code_block)
|
52
84
|
@if_conds = []
|
53
85
|
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
attr_accessor :expression, :line
|
58
|
-
def initialize(expression, line)
|
59
|
-
@expression = expression
|
60
|
-
@line = line
|
86
|
+
def inspect
|
87
|
+
"#{prefix} CONDITIONAL BLOCK\n" +
|
88
|
+
@if_conds.map{ |c| c.inspect }.join("\n")
|
61
89
|
end
|
62
90
|
end
|
63
|
-
|
91
|
+
|
64
92
|
class PrePostProcessorOperatorsTerm < Term
|
65
93
|
attr_accessor :operators
|
66
94
|
def initialize(parent_code_block)
|
@@ -70,6 +98,9 @@ module Glaemscribe
|
|
70
98
|
def is_pre_post_processor_operators?
|
71
99
|
true
|
72
100
|
end
|
101
|
+
def inspect
|
102
|
+
"#{prefix} OPERATORS (#{@operators.count})"
|
103
|
+
end
|
73
104
|
end
|
74
105
|
|
75
106
|
class CodeLinesTerm < Term
|
@@ -81,6 +112,25 @@ module Glaemscribe
|
|
81
112
|
def is_code_lines?
|
82
113
|
true
|
83
114
|
end
|
115
|
+
def inspect
|
116
|
+
"#{prefix} CODE LINES (#{@code_lines.count})"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class MacroDeployTerm < Term
|
121
|
+
attr_accessor :macro, :line, :arg_value_expressions
|
122
|
+
def initialize(macro, line, parent_code_block, arg_value_expressions)
|
123
|
+
super(parent_code_block)
|
124
|
+
@line = line
|
125
|
+
@macro = macro
|
126
|
+
@arg_value_expressions = arg_value_expressions
|
127
|
+
end
|
128
|
+
def is_macro_deploy?
|
129
|
+
true
|
130
|
+
end
|
131
|
+
def inspect
|
132
|
+
"#{prefix} MACRO DEPLOY (#{macro.name})"
|
133
|
+
end
|
84
134
|
end
|
85
135
|
|
86
136
|
class CodeBlock
|
@@ -89,6 +139,18 @@ module Glaemscribe
|
|
89
139
|
@parent_if_cond = parent_if_cond
|
90
140
|
@terms = []
|
91
141
|
end
|
142
|
+
def offset
|
143
|
+
((parent_if_cond)?(parent_if_cond.offset):("")) + " "
|
144
|
+
end
|
145
|
+
def prefix
|
146
|
+
offset + "|- "
|
147
|
+
end
|
148
|
+
def inspect
|
149
|
+
ret = ""
|
150
|
+
ret += "|-ROOT\n" if !parent_if_cond
|
151
|
+
ret += "#{prefix} Code block\n" +
|
152
|
+
@terms.map{|t| t.inspect}.join("\n")
|
153
|
+
end
|
92
154
|
end
|
93
155
|
|
94
156
|
end
|
data/lib/api/macro.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class Macro
|
26
|
+
attr_reader :name, :rule_group, :mode, :arg_names
|
27
|
+
|
28
|
+
attr_reader :root_code_block
|
29
|
+
|
30
|
+
def initialize(rule_group,name,arg_names)
|
31
|
+
@rule_group = rule_group
|
32
|
+
@mode = rule_group.mode
|
33
|
+
@name = name
|
34
|
+
@arg_names = arg_names
|
35
|
+
@root_code_block = IfTree::CodeBlock.new
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/api/mode.rb
CHANGED
@@ -22,6 +22,23 @@
|
|
22
22
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
|
+
|
26
|
+
class ModeDebugContext
|
27
|
+
attr_accessor :preprocessor_output,
|
28
|
+
:processor_pathes,
|
29
|
+
:processor_output,
|
30
|
+
:postprocessor_output,
|
31
|
+
:tts_output
|
32
|
+
|
33
|
+
def initialize
|
34
|
+
@preprocessor_output = ""
|
35
|
+
@processor_pathes = []
|
36
|
+
@processor_output = []
|
37
|
+
@postprocessor_output = ""
|
38
|
+
@tts_output = ""
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
25
42
|
class Mode
|
26
43
|
|
27
44
|
attr_accessor :errors
|
@@ -41,6 +58,9 @@ module Glaemscribe
|
|
41
58
|
|
42
59
|
attr_accessor :world, :invention
|
43
60
|
|
61
|
+
attr_accessor :has_tts
|
62
|
+
attr_reader :current_tts_voice
|
63
|
+
|
44
64
|
attr_reader :latest_option_values
|
45
65
|
|
46
66
|
def initialize(name)
|
@@ -50,6 +70,8 @@ module Glaemscribe
|
|
50
70
|
@supported_charsets = {}
|
51
71
|
@options = {}
|
52
72
|
@last_raw_options = nil
|
73
|
+
@has_tts = false
|
74
|
+
@current_tts_voice = nil
|
53
75
|
|
54
76
|
@pre_processor = TranscriptionPreProcessor.new(self)
|
55
77
|
@processor = TranscriptionProcessor.new(self)
|
@@ -95,7 +117,7 @@ module Glaemscribe
|
|
95
117
|
|
96
118
|
trans_options_converted = {}
|
97
119
|
|
98
|
-
# Do a conversion to values space
|
120
|
+
# Do a conversion from names to values space
|
99
121
|
trans_options.each{ |oname,valname|
|
100
122
|
trans_options_converted[oname] = @options[oname].value_for_value_name(valname)
|
101
123
|
}
|
@@ -117,7 +139,13 @@ module Glaemscribe
|
|
117
139
|
@processor.finalize(@latest_option_values)
|
118
140
|
|
119
141
|
raw_mode.finalize options if raw_mode
|
120
|
-
|
142
|
+
|
143
|
+
# Update the current espeak voice
|
144
|
+
if @has_tts
|
145
|
+
espeak_option = @options['espeak_voice'].value_name_for_value(@latest_option_values['espeak_voice'])
|
146
|
+
@current_tts_voice = TTS.option_name_to_voice(espeak_option)
|
147
|
+
end
|
148
|
+
|
121
149
|
self
|
122
150
|
end
|
123
151
|
|
@@ -128,16 +156,19 @@ module Glaemscribe
|
|
128
156
|
@raw_mode = loaded_raw_mode.deep_clone
|
129
157
|
end
|
130
158
|
|
131
|
-
def
|
132
|
-
l.
|
133
|
-
gsub("_",SPECIAL_CHAR_UNDERSCORE).
|
134
|
-
gsub("\u00a0",SPECIAL_CHAR_NBSP)
|
135
|
-
end
|
136
|
-
|
137
|
-
def strict_transcribe(content, charset = nil)
|
159
|
+
def strict_transcribe(content, charset, debug_context)
|
138
160
|
charset = default_charset if !charset
|
139
161
|
return false, "*** No charset usable for transcription. Failed!" if !charset
|
140
162
|
|
163
|
+
if has_tts
|
164
|
+
begin
|
165
|
+
content = TTS.ipa(content, @current_tts_voice, (raw_mode != nil) )['ipa']
|
166
|
+
debug_context.tts_output += content
|
167
|
+
rescue StandardError => e
|
168
|
+
return false, "TTS pre-transcription failed : #{e}."
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
141
172
|
# Parser works line by line
|
142
173
|
ret = content.lines.map{ |l|
|
143
174
|
restore_lf = false
|
@@ -145,10 +176,16 @@ module Glaemscribe
|
|
145
176
|
l[-1] = ""
|
146
177
|
restore_lf = true
|
147
178
|
end
|
179
|
+
|
148
180
|
l = @pre_processor.apply(l)
|
149
|
-
l
|
150
|
-
|
181
|
+
debug_context.preprocessor_output += l + "\n"
|
182
|
+
|
183
|
+
l = @processor.apply(l, debug_context)
|
184
|
+
debug_context.processor_output += l
|
185
|
+
|
151
186
|
l = @post_processor.apply(l, charset)
|
187
|
+
debug_context.postprocessor_output += l + "\n"
|
188
|
+
|
152
189
|
l += "\n" if restore_lf
|
153
190
|
l
|
154
191
|
}.join
|
@@ -156,24 +193,34 @@ module Glaemscribe
|
|
156
193
|
end
|
157
194
|
|
158
195
|
def transcribe(content, charset = nil)
|
196
|
+
debug_context = ModeDebugContext.new
|
159
197
|
if raw_mode
|
160
198
|
chunks = content.split(/({{.*?}})/m)
|
161
199
|
ret = ''
|
162
200
|
res = true
|
163
201
|
chunks.each{ |c|
|
164
202
|
if c =~ /{{(.*?)}}/m
|
165
|
-
succ, r = raw_mode.strict_transcribe($1,charset)
|
166
|
-
|
167
|
-
|
203
|
+
succ, r = raw_mode.strict_transcribe($1, charset, debug_context)
|
204
|
+
|
205
|
+
if !succ
|
206
|
+
return false, r, debug_context # Propagate error
|
207
|
+
end
|
208
|
+
|
209
|
+
ret += r
|
168
210
|
else
|
169
|
-
succ, r = strict_transcribe(c,charset)
|
170
|
-
|
171
|
-
|
211
|
+
succ, r = strict_transcribe(c,charset,debug_context)
|
212
|
+
|
213
|
+
if !succ
|
214
|
+
return false, r, debug_context # Propagate error
|
215
|
+
end
|
216
|
+
|
217
|
+
ret += r
|
172
218
|
end
|
173
219
|
}
|
174
|
-
return res,ret
|
220
|
+
return res, ret, debug_context
|
175
221
|
else
|
176
|
-
strict_transcribe(content,charset)
|
222
|
+
succ, r = strict_transcribe(content, charset, debug_context)
|
223
|
+
return succ, r, debug_context
|
177
224
|
end
|
178
225
|
end
|
179
226
|
|