glaemscribe 1.2.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/glaemscribe +2 -2
- data/glaemresources/charsets/cirth_ds.cst +514 -179
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
- data/glaemresources/charsets/tengwar_freemono.cst +1 -1
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
- data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
- data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +9 -4
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/lib/api/charset.rb +124 -57
- data/lib/api/charset_parser.rb +39 -26
- data/lib/api/mode.rb +35 -10
- data/lib/api/mode_parser.rb +21 -12
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +41 -19
- data/lib/api/rule_group.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +51 -45
- data/lib/api/transcription_processor.rb +12 -9
- data/lib/glaemscribe.rb +2 -0
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
- data/lib_espeak/glaemscribe_tts.js +363 -223
- metadata +12 -6
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
# A post processor operator to replace the out_space on the fly.
|
24
|
+
# This has the same effect as the \outspace parameter
|
25
|
+
# But can be included in the postprocessor and benefit from the if/then logic
|
26
|
+
|
27
|
+
module Glaemscribe
|
28
|
+
module API
|
29
|
+
|
30
|
+
class OutspacePostProcessorOperator < PostProcessorOperator
|
31
|
+
def initialize(mode, glaeml_element)
|
32
|
+
super(mode, glaeml_element)
|
33
|
+
@out_space = @mode.post_processor.out_space = glaeml_element.args[0].split.reject{|token| token.empty? }
|
34
|
+
end
|
35
|
+
|
36
|
+
def apply(tokens, charset)
|
37
|
+
@mode.post_processor.out_space = @out_space
|
38
|
+
tokens
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
ResourceManager::register_post_processor_class("outspace", OutspacePostProcessorOperator)
|
43
|
+
end
|
44
|
+
end
|
@@ -1,22 +1,22 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
#
|
3
3
|
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
6
|
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# This program is free software: you can redistribute it and/or modify
|
11
11
|
# it under the terms of the GNU Affero General Public License as published by
|
12
12
|
# the Free Software Foundation, either version 3 of the License, or
|
13
13
|
# any later version.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# This program is distributed in the hope that it will be useful,
|
16
16
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
17
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
18
|
# GNU Affero General Public License for more details.
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# You should have received a copy of the GNU Affero General Public License
|
21
21
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
22
|
|
@@ -24,25 +24,25 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
|
26
26
|
class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
|
27
|
-
|
27
|
+
|
28
28
|
def finalize(trans_options)
|
29
29
|
super(trans_options)
|
30
30
|
@last_triggers = {} # Allocate the lookup here to optimize
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def reset_trigger_states(charset)
|
34
34
|
# For each virtual char in charset, maintain a state.
|
35
35
|
charset.virtual_chars.each{ |vc|
|
36
36
|
@last_triggers[vc] = nil # Clear the state
|
37
37
|
}
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
|
41
41
|
if token == '*SPACE' || token =='*LF'
|
42
42
|
reset_trigger_states(charset)
|
43
43
|
return
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
# Check if token is a virtual char
|
47
47
|
c = charset[token]
|
48
48
|
return if c.nil? # May happen for empty tokens
|
@@ -54,14 +54,14 @@ module Glaemscribe
|
|
54
54
|
token = new_tokens[idx] # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
|
55
55
|
end
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
# Update states of virtual classes
|
59
59
|
charset.virtual_chars.each{|vc|
|
60
60
|
rc = vc[token]
|
61
|
-
@last_triggers[vc] = rc if rc != nil
|
61
|
+
@last_triggers[vc] = rc if rc != nil
|
62
62
|
}
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
def apply_sequences(charset,tokens)
|
66
66
|
ret = []
|
67
67
|
tokens.each { |token|
|
@@ -74,21 +74,43 @@ module Glaemscribe
|
|
74
74
|
}
|
75
75
|
ret
|
76
76
|
end
|
77
|
-
|
77
|
+
|
78
|
+
def apply_swaps(charset, tokens)
|
79
|
+
|
80
|
+
idx = 0
|
81
|
+
while idx < tokens.length - 1
|
82
|
+
tok = tokens[idx]
|
83
|
+
tgt = tokens[idx+1]
|
84
|
+
|
85
|
+
trig = charset.swap_for_trigger(tok)
|
86
|
+
|
87
|
+
if trig && trig.has_target?(tgt)
|
88
|
+
tokens[idx+1] = tok
|
89
|
+
tokens[idx] = tgt
|
90
|
+
end
|
91
|
+
|
92
|
+
idx += 1
|
93
|
+
end
|
94
|
+
|
95
|
+
tokens
|
96
|
+
end
|
97
|
+
|
78
98
|
def apply(tokens,charset)
|
79
99
|
# Apply sequence chars
|
80
100
|
tokens = apply_sequences(charset,tokens)
|
81
|
-
|
101
|
+
|
102
|
+
tokens = apply_swaps(charset, tokens)
|
103
|
+
|
82
104
|
# Clone the tokens so that we can perform ligatures AND diacritics without interferences
|
83
105
|
new_tokens = tokens.clone
|
84
|
-
|
106
|
+
|
85
107
|
# Handle l to r virtuals (diacritics ?)
|
86
|
-
reset_trigger_states(charset)
|
108
|
+
reset_trigger_states(charset)
|
87
109
|
tokens.each_with_index{ |token,idx|
|
88
110
|
apply_loop(charset,tokens,new_tokens,false,token,idx)
|
89
111
|
}
|
90
112
|
# Handle r to l virtuals (ligatures ?)
|
91
|
-
reset_trigger_states(charset)
|
113
|
+
reset_trigger_states(charset)
|
92
114
|
tokens.reverse_each.with_index{ |token,idx|
|
93
115
|
apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
|
94
116
|
}
|
@@ -96,7 +118,7 @@ module Glaemscribe
|
|
96
118
|
end
|
97
119
|
end
|
98
120
|
|
99
|
-
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
121
|
+
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
100
122
|
|
101
123
|
end
|
102
124
|
end
|
data/lib/api/rule_group.rb
CHANGED
@@ -138,7 +138,7 @@ module Glaemscribe
|
|
138
138
|
var_value = apply_vars(term.line, var_value_ex, true)
|
139
139
|
|
140
140
|
if !var_value
|
141
|
-
@mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{
|
141
|
+
@mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{arg_name}} could not be declared.")
|
142
142
|
end
|
143
143
|
end
|
144
144
|
|
@@ -1,45 +1,46 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
#
|
3
3
|
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
6
|
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# This program is free software: you can redistribute it and/or modify
|
11
11
|
# it under the terms of the GNU Affero General Public License as published by
|
12
12
|
# the Free Software Foundation, either version 3 of the License, or
|
13
13
|
# any later version.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# This program is distributed in the hope that it will be useful,
|
16
16
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
17
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
18
|
# GNU Affero General Public License for more details.
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# You should have received a copy of the GNU Affero General Public License
|
21
21
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
22
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
|
-
|
25
|
+
|
26
26
|
class PrePostProcessorOperator
|
27
27
|
attr_reader :glaeml_element
|
28
28
|
attr_reader :finalized_glaeml_element
|
29
|
-
|
30
|
-
def initialize(glaeml_element)
|
29
|
+
|
30
|
+
def initialize(mode, glaeml_element)
|
31
|
+
@mode = mode
|
31
32
|
@glaeml_element = glaeml_element
|
32
33
|
end
|
33
|
-
|
34
|
+
|
34
35
|
def eval_arg(arg, trans_options)
|
35
36
|
return nil if arg.nil?
|
36
37
|
if arg =~ /^\\eval\s/
|
37
38
|
to_eval = $'
|
38
39
|
return Eval::Parser.new().parse(to_eval, trans_options)
|
39
40
|
end
|
40
|
-
return arg
|
41
|
+
return arg
|
41
42
|
end
|
42
|
-
|
43
|
+
|
43
44
|
def finalize_glaeml_element(ge, trans_options)
|
44
45
|
ge.args.map! { |arg| eval_arg(arg, trans_options) }
|
45
46
|
ge.children.each{ |child|
|
@@ -47,37 +48,37 @@ module Glaemscribe
|
|
47
48
|
}
|
48
49
|
ge
|
49
50
|
end
|
50
|
-
|
51
|
+
|
51
52
|
def finalize(trans_options)
|
52
53
|
@finalized_glaeml_element = finalize_glaeml_element(@glaeml_element.clone, trans_options)
|
53
54
|
end
|
54
|
-
|
55
|
+
|
55
56
|
def apply
|
56
57
|
raise "Pure virtual method, should be overloaded."
|
57
58
|
end
|
58
59
|
end
|
59
|
-
|
60
|
+
|
60
61
|
class TranscriptionPrePostProcessor
|
61
62
|
attr_reader :root_code_block
|
62
|
-
|
63
|
+
|
63
64
|
attr_reader :operators
|
64
|
-
|
65
|
+
|
65
66
|
def initialize(mode)
|
66
67
|
@mode = mode
|
67
|
-
@root_code_block = IfTree::CodeBlock.new
|
68
|
+
@root_code_block = IfTree::CodeBlock.new
|
68
69
|
end
|
69
|
-
|
70
|
+
|
70
71
|
def descend_if_tree(code_block, trans_options)
|
71
|
-
code_block.terms.each{ |term|
|
72
|
+
code_block.terms.each{ |term|
|
72
73
|
if(term.is_pre_post_processor_operators?)
|
73
74
|
term.operators.each{ |operator|
|
74
75
|
@operators << operator
|
75
|
-
}
|
76
|
+
}
|
76
77
|
else
|
77
78
|
term.if_conds.each{ |if_cond|
|
78
|
-
|
79
|
+
|
79
80
|
if_eval = Eval::Parser.new()
|
80
|
-
|
81
|
+
|
81
82
|
if(if_eval.parse(if_cond.expression, trans_options) == true)
|
82
83
|
descend_if_tree(if_cond.child_code_block, trans_options)
|
83
84
|
break
|
@@ -86,7 +87,7 @@ module Glaemscribe
|
|
86
87
|
end
|
87
88
|
}
|
88
89
|
end
|
89
|
-
|
90
|
+
|
90
91
|
def finalize(trans_options)
|
91
92
|
@operators = []
|
92
93
|
# Select operators depending on conditions
|
@@ -98,42 +99,47 @@ module Glaemscribe
|
|
98
99
|
end
|
99
100
|
|
100
101
|
end
|
101
|
-
|
102
|
-
class PreProcessorOperator < PrePostProcessorOperator
|
102
|
+
|
103
|
+
class PreProcessorOperator < PrePostProcessorOperator
|
103
104
|
end
|
104
|
-
|
105
|
+
|
105
106
|
class PostProcessorOperator < PrePostProcessorOperator
|
106
107
|
end
|
107
|
-
|
108
|
-
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
108
|
+
|
109
|
+
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
109
110
|
# Apply all preprocessor rules consecutively
|
110
111
|
def apply(l)
|
111
112
|
ret = l
|
112
113
|
@operators.each{ |operator|
|
113
114
|
ret = operator.apply(ret)
|
114
|
-
}
|
115
|
+
}
|
115
116
|
ret
|
116
117
|
end
|
117
118
|
end
|
118
|
-
|
119
|
+
|
119
120
|
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
120
|
-
|
121
|
+
|
121
122
|
attr_accessor :out_space
|
122
|
-
|
123
|
+
|
123
124
|
def apply(tokens, out_charset)
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
|
126
|
+
# Cleanup the output of the chain by removing empty tokens
|
127
|
+
tokens.select!{ |tok| tok != "" }
|
128
|
+
|
128
129
|
# Apply filters
|
129
130
|
@operators.each{ |operator|
|
130
131
|
tokens = operator.apply(tokens,out_charset)
|
131
|
-
}
|
132
|
-
|
132
|
+
}
|
133
|
+
|
134
|
+
out_space_str = " "
|
135
|
+
out_space_str = @out_space.map{ |token|
|
136
|
+
out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
|
137
|
+
}.join("") if @out_space
|
138
|
+
|
133
139
|
# Convert output
|
134
140
|
ret = ""
|
135
141
|
tokens.each{ |token|
|
136
|
-
case token
|
142
|
+
case token
|
137
143
|
when ""
|
138
144
|
when "*UNKNOWN"
|
139
145
|
ret += UNKNOWN_CHAR_OUTPUT
|
@@ -142,13 +148,13 @@ module Glaemscribe
|
|
142
148
|
when "*LF"
|
143
149
|
ret += "\n"
|
144
150
|
else
|
145
|
-
c = out_charset[token]
|
151
|
+
c = out_charset[token]
|
146
152
|
ret += (c.nil?)?(UNKNOWN_CHAR_OUTPUT):c.str
|
147
|
-
end
|
153
|
+
end
|
148
154
|
}
|
149
155
|
ret
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
153
159
|
end
|
154
160
|
end
|
@@ -73,7 +73,7 @@ module Glaemscribe
|
|
73
73
|
}
|
74
74
|
end
|
75
75
|
|
76
|
-
def apply(l)
|
76
|
+
def apply(l, debug_context)
|
77
77
|
ret = []
|
78
78
|
current_group = nil
|
79
79
|
accumulated_word = ""
|
@@ -81,14 +81,14 @@ module Glaemscribe
|
|
81
81
|
l.split("").each{ |c|
|
82
82
|
case c
|
83
83
|
when " ", "\t"
|
84
|
-
ret += transcribe_word(accumulated_word)
|
84
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
85
85
|
ret += ["*SPACE"]
|
86
86
|
|
87
87
|
accumulated_word = ""
|
88
88
|
when "\r"
|
89
89
|
# Ignore
|
90
90
|
when "\n"
|
91
|
-
ret += transcribe_word(accumulated_word)
|
91
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
92
92
|
ret += ["*LF"]
|
93
93
|
|
94
94
|
accumulated_word = ""
|
@@ -97,24 +97,27 @@ module Glaemscribe
|
|
97
97
|
if c_group == current_group
|
98
98
|
accumulated_word += c
|
99
99
|
else
|
100
|
-
ret += transcribe_word(accumulated_word)
|
100
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
101
101
|
current_group = c_group
|
102
102
|
accumulated_word = c
|
103
103
|
end
|
104
104
|
end
|
105
105
|
}
|
106
106
|
# Just in case
|
107
|
-
ret += transcribe_word(accumulated_word)
|
107
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
108
108
|
ret
|
109
109
|
end
|
110
110
|
|
111
|
-
def transcribe_word(word)
|
111
|
+
def transcribe_word(word, debug_context)
|
112
112
|
res = []
|
113
113
|
word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
|
114
114
|
while word.length != 0
|
115
|
-
|
116
|
-
word
|
117
|
-
|
115
|
+
tokens, len = @transcription_tree.transcribe(word)
|
116
|
+
word = word[len..-1]
|
117
|
+
eaten = word[0..len-1]
|
118
|
+
res += tokens
|
119
|
+
|
120
|
+
debug_context.processor_pathes << [eaten, tokens, tokens]
|
118
121
|
end
|
119
122
|
# Return token list
|
120
123
|
res
|
data/lib/glaemscribe.rb
CHANGED
@@ -67,6 +67,8 @@ module Glaemscribe
|
|
67
67
|
require API_PATH + "pre_processor/substitute.rb"
|
68
68
|
require API_PATH + "pre_processor/rxsubstitute.rb"
|
69
69
|
require API_PATH + "pre_processor/up_down_tehta_split.rb"
|
70
|
+
|
71
|
+
require API_PATH + "post_processor/outspace.rb"
|
70
72
|
require API_PATH + "post_processor/reverse.rb"
|
71
73
|
require API_PATH + "post_processor/resolve_virtuals.rb"
|
72
74
|
|