glaemscribe 1.1.14 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/glaemscribe +21 -17
- data/glaemresources/charsets/cirth_ds.cst +540 -0
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2868 -0
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2729 -0
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2742 -0
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2726 -0
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2722 -0
- data/glaemresources/charsets/tengwar_freemono.cst +217 -0
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2948 -0
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2813 -0
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2808 -0
- data/glaemresources/charsets/tengwar_telcontar.cst +225 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +121 -0
- data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
- data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +13 -3
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +776 -0
- data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
- data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
- data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
- data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
- data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
- data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/glaemresources/modes/raw-tengwar.glaem +46 -23
- data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
- data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
- data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
- data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
- data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
- data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
- data/lib/api/charset.rb +67 -7
- data/lib/api/charset_parser.rb +14 -1
- data/lib/api/constants.rb +3 -4
- data/lib/api/fragment.rb +26 -5
- data/lib/api/if_tree.rb +70 -8
- data/lib/api/macro.rb +40 -0
- data/lib/api/mode.rb +66 -19
- data/lib/api/mode_parser.rb +117 -14
- data/lib/api/object_additions.rb +23 -1
- data/lib/api/option.rb +17 -2
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +25 -9
- data/lib/api/resource_manager.rb +1 -0
- data/lib/api/rule_group.rb +170 -26
- data/lib/api/sheaf_chain_iterator.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +8 -5
- data/lib/api/transcription_processor.rb +15 -12
- data/lib/api/tts.rb +51 -0
- data/lib/glaemscribe.rb +36 -31
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +35 -0
- data/lib_espeak/glaemscribe_tts.js +505 -0
- metadata +76 -24
data/lib/api/charset.rb
CHANGED
@@ -30,11 +30,11 @@ module Glaemscribe
|
|
30
30
|
attr_reader :virtual_chars
|
31
31
|
|
32
32
|
class Char
|
33
|
-
attr_accessor :line
|
34
|
-
attr_accessor :code
|
35
|
-
attr_accessor :names
|
36
|
-
attr_accessor :str
|
37
|
-
attr_accessor :charset
|
33
|
+
attr_accessor :line # Line num in the sourcecode
|
34
|
+
attr_accessor :code # Position in unicode
|
35
|
+
attr_accessor :names # Names
|
36
|
+
attr_accessor :str # How does this char resolve as a string
|
37
|
+
attr_accessor :charset # Pointer to parent charset
|
38
38
|
|
39
39
|
def initialize
|
40
40
|
@names = {}
|
@@ -43,9 +43,13 @@ module Glaemscribe
|
|
43
43
|
def virtual?
|
44
44
|
false
|
45
45
|
end
|
46
|
+
|
47
|
+
def sequence?
|
48
|
+
false
|
49
|
+
end
|
46
50
|
end
|
47
51
|
|
48
|
-
class VirtualChar
|
52
|
+
class VirtualChar # Could have had inheritance here ...
|
49
53
|
attr_accessor :line
|
50
54
|
attr_accessor :names
|
51
55
|
attr_accessor :classes
|
@@ -121,6 +125,45 @@ module Glaemscribe
|
|
121
125
|
def virtual?
|
122
126
|
true
|
123
127
|
end
|
128
|
+
|
129
|
+
def sequence?
|
130
|
+
false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class SequenceChar
|
135
|
+
attr_accessor :line # Line of code
|
136
|
+
attr_accessor :names # Names
|
137
|
+
attr_accessor :sequence # The sequence of chars
|
138
|
+
attr_accessor :charset # Pointer to parent charset
|
139
|
+
|
140
|
+
def virtual?
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
def sequence?
|
145
|
+
true
|
146
|
+
end
|
147
|
+
|
148
|
+
def str
|
149
|
+
# A sequence char should never arrive unreplaced
|
150
|
+
VIRTUAL_CHAR_OUTPUT
|
151
|
+
end
|
152
|
+
|
153
|
+
def finalize
|
154
|
+
if @sequence.count == 0
|
155
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence for sequence char is empty.")
|
156
|
+
end
|
157
|
+
|
158
|
+
@sequence.each{ |symbol|
|
159
|
+
# Check that the sequence is correct
|
160
|
+
found = @charset[symbol]
|
161
|
+
if !found
|
162
|
+
@charset.errors << Glaeml::Error.new(@line, "Sequence char #{symbol} cannot be found in the charset.")
|
163
|
+
end
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
124
167
|
end
|
125
168
|
|
126
169
|
def initialize(name)
|
@@ -156,10 +199,21 @@ module Glaemscribe
|
|
156
199
|
@chars << c
|
157
200
|
end
|
158
201
|
|
202
|
+
def add_sequence_char(line, names, seq)
|
203
|
+
return if names.empty? || names.include?("?") # Ignore characters with '?'
|
204
|
+
|
205
|
+
c = SequenceChar.new
|
206
|
+
c.line = line
|
207
|
+
c.names = names
|
208
|
+
c.sequence = seq.split.reject{|token| token.empty? }
|
209
|
+
c.charset = self
|
210
|
+
@chars << c
|
211
|
+
end
|
212
|
+
|
159
213
|
def finalize
|
160
214
|
@errors = []
|
161
215
|
@lookup_table = {}
|
162
|
-
@virtual_chars = []
|
216
|
+
@virtual_chars = [] # A convenient filtered array
|
163
217
|
|
164
218
|
@chars.each { |c|
|
165
219
|
c.names.each { |cname|
|
@@ -179,6 +233,12 @@ module Glaemscribe
|
|
179
233
|
end
|
180
234
|
}
|
181
235
|
|
236
|
+
@chars.each{|c|
|
237
|
+
if c.class == SequenceChar
|
238
|
+
c.finalize
|
239
|
+
end
|
240
|
+
}
|
241
|
+
|
182
242
|
API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
|
183
243
|
end
|
184
244
|
|
data/lib/api/charset_parser.rb
CHANGED
@@ -47,6 +47,13 @@ module Glaemscribe
|
|
47
47
|
names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
48
48
|
@charset.add_char(char_element.line,code,names)
|
49
49
|
}
|
50
|
+
|
51
|
+
doc.root_node.gpath("seq").each{ |seq_elemnt|
|
52
|
+
names = seq_elemnt.args
|
53
|
+
child_node = seq_elemnt.children.first
|
54
|
+
seq = (child_node && child_node.text?)?(child_node.args.first):("")
|
55
|
+
@charset.add_sequence_char(seq_elemnt.line,names,seq)
|
56
|
+
}
|
50
57
|
|
51
58
|
doc.root_node.gpath("virtual").each { |virtual_element|
|
52
59
|
names = virtual_element.args
|
@@ -57,7 +64,13 @@ module Glaemscribe
|
|
57
64
|
virtual_element.gpath("class").each { |class_element|
|
58
65
|
vc = Charset::VirtualChar::VirtualClass.new
|
59
66
|
vc.target = class_element.args[0]
|
60
|
-
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
67
|
+
vc.triggers = class_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
|
68
|
+
|
69
|
+
# Allow triggers to be defined inside the body of the class element
|
70
|
+
text_lines = class_element.children.select { |c| c.text? }.map{ |c| c.args.first}
|
71
|
+
inner_triggers = text_lines.join(" ").split(/\s/).select{ |e| e != '' }
|
72
|
+
vc.triggers += inner_triggers
|
73
|
+
|
61
74
|
classes << vc
|
62
75
|
}
|
63
76
|
virtual_element.gpath("reversed").each { |reversed_element|
|
data/lib/api/constants.rb
CHANGED
@@ -23,11 +23,10 @@
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
25
|
WORD_BREAKER = "|"
|
26
|
-
WORD_BOUNDARY = "_"
|
27
|
-
|
28
|
-
SPECIAL_CHAR_UNDERSCORE = '➊'
|
29
|
-
SPECIAL_CHAR_NBSP = '➋'
|
30
26
|
|
27
|
+
WORD_BOUNDARY_LANG = "_"
|
28
|
+
WORD_BOUNDARY_TREE = "\u0000"
|
29
|
+
|
31
30
|
UNKNOWN_CHAR_OUTPUT = "☠"
|
32
31
|
VIRTUAL_CHAR_OUTPUT = "☢" # When transcribing a virtual char...
|
33
32
|
end
|
data/lib/api/fragment.rb
CHANGED
@@ -41,7 +41,7 @@ module Glaemscribe
|
|
41
41
|
EQUIVALENCE_RX_OUT = /(\(.*?\))/
|
42
42
|
EQUIVALENCE_RX_IN = /\((.*?)\)/
|
43
43
|
|
44
|
-
# Should pass a fragment expression, e.g. : "h(a
|
44
|
+
# Should pass a fragment expression, e.g. : "h(a,ä)(i,ï)"
|
45
45
|
def initialize(sheaf, expression)
|
46
46
|
@sheaf = sheaf
|
47
47
|
@mode = sheaf.mode
|
@@ -49,16 +49,16 @@ module Glaemscribe
|
|
49
49
|
@expression = expression
|
50
50
|
|
51
51
|
# Split the fragment, turn it into an array of arrays, e.g. [[h],[a,ä],[i,ï]]
|
52
|
-
equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }
|
52
|
+
equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }.reject{ |eq| eq == '' }
|
53
53
|
equivalences = equivalences.map{ |eq|
|
54
54
|
eq =~ EQUIVALENCE_RX_IN
|
55
55
|
if $1
|
56
56
|
eq = $1.split(EQUIVALENCE_SEPARATOR,-1).map{ |elt|
|
57
57
|
elt = elt.strip
|
58
|
-
elt.split(/\s/)
|
59
|
-
}
|
58
|
+
elt.split(/\s/).map{ |leaf| finalize_fragment_leaf(leaf) }
|
59
|
+
}
|
60
60
|
else
|
61
|
-
eq = [eq.split(/\s/)] # This equivalence has only one possibility
|
61
|
+
eq = [eq.split(/\s/).map{ |leaf| finalize_fragment_leaf(leaf) }] # This equivalence has only one possibility
|
62
62
|
end
|
63
63
|
}
|
64
64
|
|
@@ -87,6 +87,7 @@ module Glaemscribe
|
|
87
87
|
# Calculate all combinations for this fragment (productize the array of arrays)
|
88
88
|
res = equivalences[0]
|
89
89
|
|
90
|
+
# ((eq0 x eq1) x eq2) x eq3 ) ... )))))
|
90
91
|
(equivalences.length-1).times { |i|
|
91
92
|
prod = res.product(equivalences[i+1]).map{ |x,y| x+y}
|
92
93
|
res = prod
|
@@ -95,6 +96,26 @@ module Glaemscribe
|
|
95
96
|
@combinations = res
|
96
97
|
end
|
97
98
|
|
99
|
+
def finalize_fragment_leaf(leaf)
|
100
|
+
if src?
|
101
|
+
|
102
|
+
# Replace {UNI_XXXX} by its value to allow any unicode char to be found in the transcription tree
|
103
|
+
leaf = leaf.gsub(RuleGroup::UNICODE_VAR_NAME_REGEXP_OUT) { |cap_var|
|
104
|
+
unival = $1
|
105
|
+
new_char = [unival.hex].pack("U")
|
106
|
+
new_char = "\u0001" if new_char == '_'
|
107
|
+
new_char
|
108
|
+
}
|
109
|
+
|
110
|
+
# Replace '_' (word boundary) by '\u0000' to allow
|
111
|
+
# the real underscore to be used in the transcription tree
|
112
|
+
# (Do it after replacing the uni_xxx vars because they have underscores inside)
|
113
|
+
leaf = leaf.gsub(WORD_BOUNDARY_LANG, WORD_BOUNDARY_TREE)
|
114
|
+
leaf = leaf.gsub("\u0001","_")
|
115
|
+
end
|
116
|
+
|
117
|
+
leaf
|
118
|
+
end
|
98
119
|
|
99
120
|
def p
|
100
121
|
ret = "---- " + @expression + "\n"
|
data/lib/api/if_tree.rb
CHANGED
@@ -24,14 +24,36 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
module IfTree
|
26
26
|
|
27
|
+
# A branching if condition
|
27
28
|
class IfCond
|
28
29
|
attr_accessor :line, :expression, :parent_if_term, :child_code_block
|
29
30
|
def initialize(line, parent_if_term, expression)
|
30
31
|
@parent_if_term = parent_if_term
|
31
32
|
@expression = expression
|
32
33
|
end
|
34
|
+
def offset
|
35
|
+
parent_if_term.offset + " "
|
36
|
+
end
|
37
|
+
def prefix
|
38
|
+
offset + "|-"
|
39
|
+
end
|
40
|
+
def inspect
|
41
|
+
"#{prefix} IF #{expression}\n" +
|
42
|
+
"#{child_code_block.inspect}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# A line of code
|
47
|
+
class CodeLine
|
48
|
+
attr_accessor :expression, :line
|
49
|
+
def initialize(expression, line)
|
50
|
+
@expression = expression
|
51
|
+
@line = line
|
52
|
+
end
|
33
53
|
end
|
34
54
|
|
55
|
+
# A node (code lines / preprocessor operators / ... )
|
56
|
+
# A node may have children or not depending on their nature
|
35
57
|
class Term
|
36
58
|
attr_accessor :parent_code_block
|
37
59
|
def initialize(parent_code_block)
|
@@ -43,24 +65,30 @@ module Glaemscribe
|
|
43
65
|
def is_pre_post_processor_operators?
|
44
66
|
false
|
45
67
|
end
|
68
|
+
def is_macro_deploy?
|
69
|
+
false
|
70
|
+
end
|
71
|
+
def offset
|
72
|
+
parent_code_block.offset + " "
|
73
|
+
end
|
74
|
+
def prefix
|
75
|
+
offset + "|- "
|
76
|
+
end
|
46
77
|
end
|
47
78
|
|
79
|
+
# A ifterm may have multiple ifconds (if,elsif,elsif,...,else)
|
48
80
|
class IfTerm < Term
|
49
81
|
attr_accessor :if_conds
|
50
82
|
def initialize(parent_code_block)
|
51
83
|
super(parent_code_block)
|
52
84
|
@if_conds = []
|
53
85
|
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
attr_accessor :expression, :line
|
58
|
-
def initialize(expression, line)
|
59
|
-
@expression = expression
|
60
|
-
@line = line
|
86
|
+
def inspect
|
87
|
+
"#{prefix} CONDITIONAL BLOCK\n" +
|
88
|
+
@if_conds.map{ |c| c.inspect }.join("\n")
|
61
89
|
end
|
62
90
|
end
|
63
|
-
|
91
|
+
|
64
92
|
class PrePostProcessorOperatorsTerm < Term
|
65
93
|
attr_accessor :operators
|
66
94
|
def initialize(parent_code_block)
|
@@ -70,6 +98,9 @@ module Glaemscribe
|
|
70
98
|
def is_pre_post_processor_operators?
|
71
99
|
true
|
72
100
|
end
|
101
|
+
def inspect
|
102
|
+
"#{prefix} OPERATORS (#{@operators.count})"
|
103
|
+
end
|
73
104
|
end
|
74
105
|
|
75
106
|
class CodeLinesTerm < Term
|
@@ -81,6 +112,25 @@ module Glaemscribe
|
|
81
112
|
def is_code_lines?
|
82
113
|
true
|
83
114
|
end
|
115
|
+
def inspect
|
116
|
+
"#{prefix} CODE LINES (#{@code_lines.count})"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class MacroDeployTerm < Term
|
121
|
+
attr_accessor :macro, :line, :arg_value_expressions
|
122
|
+
def initialize(macro, line, parent_code_block, arg_value_expressions)
|
123
|
+
super(parent_code_block)
|
124
|
+
@line = line
|
125
|
+
@macro = macro
|
126
|
+
@arg_value_expressions = arg_value_expressions
|
127
|
+
end
|
128
|
+
def is_macro_deploy?
|
129
|
+
true
|
130
|
+
end
|
131
|
+
def inspect
|
132
|
+
"#{prefix} MACRO DEPLOY (#{macro.name})"
|
133
|
+
end
|
84
134
|
end
|
85
135
|
|
86
136
|
class CodeBlock
|
@@ -89,6 +139,18 @@ module Glaemscribe
|
|
89
139
|
@parent_if_cond = parent_if_cond
|
90
140
|
@terms = []
|
91
141
|
end
|
142
|
+
def offset
|
143
|
+
((parent_if_cond)?(parent_if_cond.offset):("")) + " "
|
144
|
+
end
|
145
|
+
def prefix
|
146
|
+
offset + "|- "
|
147
|
+
end
|
148
|
+
def inspect
|
149
|
+
ret = ""
|
150
|
+
ret += "|-ROOT\n" if !parent_if_cond
|
151
|
+
ret += "#{prefix} Code block\n" +
|
152
|
+
@terms.map{|t| t.inspect}.join("\n")
|
153
|
+
end
|
92
154
|
end
|
93
155
|
|
94
156
|
end
|
data/lib/api/macro.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class Macro
|
26
|
+
attr_reader :name, :rule_group, :mode, :arg_names
|
27
|
+
|
28
|
+
attr_reader :root_code_block
|
29
|
+
|
30
|
+
def initialize(rule_group,name,arg_names)
|
31
|
+
@rule_group = rule_group
|
32
|
+
@mode = rule_group.mode
|
33
|
+
@name = name
|
34
|
+
@arg_names = arg_names
|
35
|
+
@root_code_block = IfTree::CodeBlock.new
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/api/mode.rb
CHANGED
@@ -22,6 +22,23 @@
|
|
22
22
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
|
+
|
26
|
+
class ModeDebugContext
|
27
|
+
attr_accessor :preprocessor_output,
|
28
|
+
:processor_pathes,
|
29
|
+
:processor_output,
|
30
|
+
:postprocessor_output,
|
31
|
+
:tts_output
|
32
|
+
|
33
|
+
def initialize
|
34
|
+
@preprocessor_output = ""
|
35
|
+
@processor_pathes = []
|
36
|
+
@processor_output = []
|
37
|
+
@postprocessor_output = ""
|
38
|
+
@tts_output = ""
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
25
42
|
class Mode
|
26
43
|
|
27
44
|
attr_accessor :errors
|
@@ -41,6 +58,9 @@ module Glaemscribe
|
|
41
58
|
|
42
59
|
attr_accessor :world, :invention
|
43
60
|
|
61
|
+
attr_accessor :has_tts
|
62
|
+
attr_reader :current_tts_voice
|
63
|
+
|
44
64
|
attr_reader :latest_option_values
|
45
65
|
|
46
66
|
def initialize(name)
|
@@ -50,6 +70,8 @@ module Glaemscribe
|
|
50
70
|
@supported_charsets = {}
|
51
71
|
@options = {}
|
52
72
|
@last_raw_options = nil
|
73
|
+
@has_tts = false
|
74
|
+
@current_tts_voice = nil
|
53
75
|
|
54
76
|
@pre_processor = TranscriptionPreProcessor.new(self)
|
55
77
|
@processor = TranscriptionProcessor.new(self)
|
@@ -95,7 +117,7 @@ module Glaemscribe
|
|
95
117
|
|
96
118
|
trans_options_converted = {}
|
97
119
|
|
98
|
-
# Do a conversion to values space
|
120
|
+
# Do a conversion from names to values space
|
99
121
|
trans_options.each{ |oname,valname|
|
100
122
|
trans_options_converted[oname] = @options[oname].value_for_value_name(valname)
|
101
123
|
}
|
@@ -117,7 +139,13 @@ module Glaemscribe
|
|
117
139
|
@processor.finalize(@latest_option_values)
|
118
140
|
|
119
141
|
raw_mode.finalize options if raw_mode
|
120
|
-
|
142
|
+
|
143
|
+
# Update the current espeak voice
|
144
|
+
if @has_tts
|
145
|
+
espeak_option = @options['espeak_voice'].value_name_for_value(@latest_option_values['espeak_voice'])
|
146
|
+
@current_tts_voice = TTS.option_name_to_voice(espeak_option)
|
147
|
+
end
|
148
|
+
|
121
149
|
self
|
122
150
|
end
|
123
151
|
|
@@ -128,16 +156,19 @@ module Glaemscribe
|
|
128
156
|
@raw_mode = loaded_raw_mode.deep_clone
|
129
157
|
end
|
130
158
|
|
131
|
-
def
|
132
|
-
l.
|
133
|
-
gsub("_",SPECIAL_CHAR_UNDERSCORE).
|
134
|
-
gsub("\u00a0",SPECIAL_CHAR_NBSP)
|
135
|
-
end
|
136
|
-
|
137
|
-
def strict_transcribe(content, charset = nil)
|
159
|
+
def strict_transcribe(content, charset, debug_context)
|
138
160
|
charset = default_charset if !charset
|
139
161
|
return false, "*** No charset usable for transcription. Failed!" if !charset
|
140
162
|
|
163
|
+
if has_tts
|
164
|
+
begin
|
165
|
+
content = TTS.ipa(content, @current_tts_voice, (raw_mode != nil) )['ipa']
|
166
|
+
debug_context.tts_output += content
|
167
|
+
rescue StandardError => e
|
168
|
+
return false, "TTS pre-transcription failed : #{e}."
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
141
172
|
# Parser works line by line
|
142
173
|
ret = content.lines.map{ |l|
|
143
174
|
restore_lf = false
|
@@ -145,10 +176,16 @@ module Glaemscribe
|
|
145
176
|
l[-1] = ""
|
146
177
|
restore_lf = true
|
147
178
|
end
|
179
|
+
|
148
180
|
l = @pre_processor.apply(l)
|
149
|
-
l
|
150
|
-
|
181
|
+
debug_context.preprocessor_output += l + "\n"
|
182
|
+
|
183
|
+
l = @processor.apply(l, debug_context)
|
184
|
+
debug_context.processor_output += l
|
185
|
+
|
151
186
|
l = @post_processor.apply(l, charset)
|
187
|
+
debug_context.postprocessor_output += l + "\n"
|
188
|
+
|
152
189
|
l += "\n" if restore_lf
|
153
190
|
l
|
154
191
|
}.join
|
@@ -156,24 +193,34 @@ module Glaemscribe
|
|
156
193
|
end
|
157
194
|
|
158
195
|
def transcribe(content, charset = nil)
|
196
|
+
debug_context = ModeDebugContext.new
|
159
197
|
if raw_mode
|
160
198
|
chunks = content.split(/({{.*?}})/m)
|
161
199
|
ret = ''
|
162
200
|
res = true
|
163
201
|
chunks.each{ |c|
|
164
202
|
if c =~ /{{(.*?)}}/m
|
165
|
-
succ, r = raw_mode.strict_transcribe($1,charset)
|
166
|
-
|
167
|
-
|
203
|
+
succ, r = raw_mode.strict_transcribe($1, charset, debug_context)
|
204
|
+
|
205
|
+
if !succ
|
206
|
+
return false, r, debug_context # Propagate error
|
207
|
+
end
|
208
|
+
|
209
|
+
ret += r
|
168
210
|
else
|
169
|
-
succ, r = strict_transcribe(c,charset)
|
170
|
-
|
171
|
-
|
211
|
+
succ, r = strict_transcribe(c,charset,debug_context)
|
212
|
+
|
213
|
+
if !succ
|
214
|
+
return false, r, debug_context # Propagate error
|
215
|
+
end
|
216
|
+
|
217
|
+
ret += r
|
172
218
|
end
|
173
219
|
}
|
174
|
-
return res,ret
|
220
|
+
return res, ret, debug_context
|
175
221
|
else
|
176
|
-
strict_transcribe(content,charset)
|
222
|
+
succ, r = strict_transcribe(content, charset, debug_context)
|
223
|
+
return succ, r, debug_context
|
177
224
|
end
|
178
225
|
end
|
179
226
|
|