glaemscribe 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/glaemscribe +2 -2
- data/glaemresources/charsets/cirth_ds.cst +514 -179
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
- data/glaemresources/charsets/tengwar_freemono.cst +1 -1
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
- data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
- data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +9 -4
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/lib/api/charset.rb +124 -57
- data/lib/api/charset_parser.rb +39 -26
- data/lib/api/mode.rb +35 -10
- data/lib/api/mode_parser.rb +21 -12
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +41 -19
- data/lib/api/rule_group.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +51 -45
- data/lib/api/transcription_processor.rb +12 -9
- data/lib/glaemscribe.rb +2 -0
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
- data/lib_espeak/glaemscribe_tts.js +363 -223
- metadata +12 -6
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
# A post processor operator to replace the out_space on the fly.
|
24
|
+
# This has the same effect as the \outspace parameter
|
25
|
+
# But can be included in the postprocessor and benefit from the if/then logic
|
26
|
+
|
27
|
+
module Glaemscribe
|
28
|
+
module API
|
29
|
+
|
30
|
+
class OutspacePostProcessorOperator < PostProcessorOperator
|
31
|
+
def initialize(mode, glaeml_element)
|
32
|
+
super(mode, glaeml_element)
|
33
|
+
@out_space = @mode.post_processor.out_space = glaeml_element.args[0].split.reject{|token| token.empty? }
|
34
|
+
end
|
35
|
+
|
36
|
+
def apply(tokens, charset)
|
37
|
+
@mode.post_processor.out_space = @out_space
|
38
|
+
tokens
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
ResourceManager::register_post_processor_class("outspace", OutspacePostProcessorOperator)
|
43
|
+
end
|
44
|
+
end
|
@@ -1,22 +1,22 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
#
|
3
3
|
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
6
|
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# This program is free software: you can redistribute it and/or modify
|
11
11
|
# it under the terms of the GNU Affero General Public License as published by
|
12
12
|
# the Free Software Foundation, either version 3 of the License, or
|
13
13
|
# any later version.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# This program is distributed in the hope that it will be useful,
|
16
16
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
17
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
18
|
# GNU Affero General Public License for more details.
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# You should have received a copy of the GNU Affero General Public License
|
21
21
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
22
|
|
@@ -24,25 +24,25 @@ module Glaemscribe
|
|
24
24
|
module API
|
25
25
|
|
26
26
|
class ResolveVirtualsPostProcessorOperator < PostProcessorOperator
|
27
|
-
|
27
|
+
|
28
28
|
def finalize(trans_options)
|
29
29
|
super(trans_options)
|
30
30
|
@last_triggers = {} # Allocate the lookup here to optimize
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def reset_trigger_states(charset)
|
34
34
|
# For each virtual char in charset, maintain a state.
|
35
35
|
charset.virtual_chars.each{ |vc|
|
36
36
|
@last_triggers[vc] = nil # Clear the state
|
37
37
|
}
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
|
41
41
|
if token == '*SPACE' || token =='*LF'
|
42
42
|
reset_trigger_states(charset)
|
43
43
|
return
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
# Check if token is a virtual char
|
47
47
|
c = charset[token]
|
48
48
|
return if c.nil? # May happen for empty tokens
|
@@ -54,14 +54,14 @@ module Glaemscribe
|
|
54
54
|
token = new_tokens[idx] # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
|
55
55
|
end
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
# Update states of virtual classes
|
59
59
|
charset.virtual_chars.each{|vc|
|
60
60
|
rc = vc[token]
|
61
|
-
@last_triggers[vc] = rc if rc != nil
|
61
|
+
@last_triggers[vc] = rc if rc != nil
|
62
62
|
}
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
def apply_sequences(charset,tokens)
|
66
66
|
ret = []
|
67
67
|
tokens.each { |token|
|
@@ -74,21 +74,43 @@ module Glaemscribe
|
|
74
74
|
}
|
75
75
|
ret
|
76
76
|
end
|
77
|
-
|
77
|
+
|
78
|
+
def apply_swaps(charset, tokens)
|
79
|
+
|
80
|
+
idx = 0
|
81
|
+
while idx < tokens.length - 1
|
82
|
+
tok = tokens[idx]
|
83
|
+
tgt = tokens[idx+1]
|
84
|
+
|
85
|
+
trig = charset.swap_for_trigger(tok)
|
86
|
+
|
87
|
+
if trig && trig.has_target?(tgt)
|
88
|
+
tokens[idx+1] = tok
|
89
|
+
tokens[idx] = tgt
|
90
|
+
end
|
91
|
+
|
92
|
+
idx += 1
|
93
|
+
end
|
94
|
+
|
95
|
+
tokens
|
96
|
+
end
|
97
|
+
|
78
98
|
def apply(tokens,charset)
|
79
99
|
# Apply sequence chars
|
80
100
|
tokens = apply_sequences(charset,tokens)
|
81
|
-
|
101
|
+
|
102
|
+
tokens = apply_swaps(charset, tokens)
|
103
|
+
|
82
104
|
# Clone the tokens so that we can perform ligatures AND diacritics without interferences
|
83
105
|
new_tokens = tokens.clone
|
84
|
-
|
106
|
+
|
85
107
|
# Handle l to r virtuals (diacritics ?)
|
86
|
-
reset_trigger_states(charset)
|
108
|
+
reset_trigger_states(charset)
|
87
109
|
tokens.each_with_index{ |token,idx|
|
88
110
|
apply_loop(charset,tokens,new_tokens,false,token,idx)
|
89
111
|
}
|
90
112
|
# Handle r to l virtuals (ligatures ?)
|
91
|
-
reset_trigger_states(charset)
|
113
|
+
reset_trigger_states(charset)
|
92
114
|
tokens.reverse_each.with_index{ |token,idx|
|
93
115
|
apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
|
94
116
|
}
|
@@ -96,7 +118,7 @@ module Glaemscribe
|
|
96
118
|
end
|
97
119
|
end
|
98
120
|
|
99
|
-
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
121
|
+
ResourceManager::register_post_processor_class("resolve_virtuals", ResolveVirtualsPostProcessorOperator)
|
100
122
|
|
101
123
|
end
|
102
124
|
end
|
data/lib/api/rule_group.rb
CHANGED
@@ -138,7 +138,7 @@ module Glaemscribe
|
|
138
138
|
var_value = apply_vars(term.line, var_value_ex, true)
|
139
139
|
|
140
140
|
if !var_value
|
141
|
-
@mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{
|
141
|
+
@mode.errors << Glaeml::Error.new(term.line, "Thus, variable {#{arg_name}} could not be declared.")
|
142
142
|
end
|
143
143
|
end
|
144
144
|
|
@@ -1,45 +1,46 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
#
|
3
3
|
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
-
# the transcription of texts between writing systems, and more
|
5
|
-
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
6
|
# invented languages to some of his devised writing systems.
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# This program is free software: you can redistribute it and/or modify
|
11
11
|
# it under the terms of the GNU Affero General Public License as published by
|
12
12
|
# the Free Software Foundation, either version 3 of the License, or
|
13
13
|
# any later version.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# This program is distributed in the hope that it will be useful,
|
16
16
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
17
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
18
|
# GNU Affero General Public License for more details.
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# You should have received a copy of the GNU Affero General Public License
|
21
21
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
22
|
|
23
23
|
module Glaemscribe
|
24
24
|
module API
|
25
|
-
|
25
|
+
|
26
26
|
class PrePostProcessorOperator
|
27
27
|
attr_reader :glaeml_element
|
28
28
|
attr_reader :finalized_glaeml_element
|
29
|
-
|
30
|
-
def initialize(glaeml_element)
|
29
|
+
|
30
|
+
def initialize(mode, glaeml_element)
|
31
|
+
@mode = mode
|
31
32
|
@glaeml_element = glaeml_element
|
32
33
|
end
|
33
|
-
|
34
|
+
|
34
35
|
def eval_arg(arg, trans_options)
|
35
36
|
return nil if arg.nil?
|
36
37
|
if arg =~ /^\\eval\s/
|
37
38
|
to_eval = $'
|
38
39
|
return Eval::Parser.new().parse(to_eval, trans_options)
|
39
40
|
end
|
40
|
-
return arg
|
41
|
+
return arg
|
41
42
|
end
|
42
|
-
|
43
|
+
|
43
44
|
def finalize_glaeml_element(ge, trans_options)
|
44
45
|
ge.args.map! { |arg| eval_arg(arg, trans_options) }
|
45
46
|
ge.children.each{ |child|
|
@@ -47,37 +48,37 @@ module Glaemscribe
|
|
47
48
|
}
|
48
49
|
ge
|
49
50
|
end
|
50
|
-
|
51
|
+
|
51
52
|
def finalize(trans_options)
|
52
53
|
@finalized_glaeml_element = finalize_glaeml_element(@glaeml_element.clone, trans_options)
|
53
54
|
end
|
54
|
-
|
55
|
+
|
55
56
|
def apply
|
56
57
|
raise "Pure virtual method, should be overloaded."
|
57
58
|
end
|
58
59
|
end
|
59
|
-
|
60
|
+
|
60
61
|
class TranscriptionPrePostProcessor
|
61
62
|
attr_reader :root_code_block
|
62
|
-
|
63
|
+
|
63
64
|
attr_reader :operators
|
64
|
-
|
65
|
+
|
65
66
|
def initialize(mode)
|
66
67
|
@mode = mode
|
67
|
-
@root_code_block = IfTree::CodeBlock.new
|
68
|
+
@root_code_block = IfTree::CodeBlock.new
|
68
69
|
end
|
69
|
-
|
70
|
+
|
70
71
|
def descend_if_tree(code_block, trans_options)
|
71
|
-
code_block.terms.each{ |term|
|
72
|
+
code_block.terms.each{ |term|
|
72
73
|
if(term.is_pre_post_processor_operators?)
|
73
74
|
term.operators.each{ |operator|
|
74
75
|
@operators << operator
|
75
|
-
}
|
76
|
+
}
|
76
77
|
else
|
77
78
|
term.if_conds.each{ |if_cond|
|
78
|
-
|
79
|
+
|
79
80
|
if_eval = Eval::Parser.new()
|
80
|
-
|
81
|
+
|
81
82
|
if(if_eval.parse(if_cond.expression, trans_options) == true)
|
82
83
|
descend_if_tree(if_cond.child_code_block, trans_options)
|
83
84
|
break
|
@@ -86,7 +87,7 @@ module Glaemscribe
|
|
86
87
|
end
|
87
88
|
}
|
88
89
|
end
|
89
|
-
|
90
|
+
|
90
91
|
def finalize(trans_options)
|
91
92
|
@operators = []
|
92
93
|
# Select operators depending on conditions
|
@@ -98,42 +99,47 @@ module Glaemscribe
|
|
98
99
|
end
|
99
100
|
|
100
101
|
end
|
101
|
-
|
102
|
-
class PreProcessorOperator < PrePostProcessorOperator
|
102
|
+
|
103
|
+
class PreProcessorOperator < PrePostProcessorOperator
|
103
104
|
end
|
104
|
-
|
105
|
+
|
105
106
|
class PostProcessorOperator < PrePostProcessorOperator
|
106
107
|
end
|
107
|
-
|
108
|
-
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
108
|
+
|
109
|
+
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
109
110
|
# Apply all preprocessor rules consecutively
|
110
111
|
def apply(l)
|
111
112
|
ret = l
|
112
113
|
@operators.each{ |operator|
|
113
114
|
ret = operator.apply(ret)
|
114
|
-
}
|
115
|
+
}
|
115
116
|
ret
|
116
117
|
end
|
117
118
|
end
|
118
|
-
|
119
|
+
|
119
120
|
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
120
|
-
|
121
|
+
|
121
122
|
attr_accessor :out_space
|
122
|
-
|
123
|
+
|
123
124
|
def apply(tokens, out_charset)
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
|
126
|
+
# Cleanup the output of the chain by removing empty tokens
|
127
|
+
tokens.select!{ |tok| tok != "" }
|
128
|
+
|
128
129
|
# Apply filters
|
129
130
|
@operators.each{ |operator|
|
130
131
|
tokens = operator.apply(tokens,out_charset)
|
131
|
-
}
|
132
|
-
|
132
|
+
}
|
133
|
+
|
134
|
+
out_space_str = " "
|
135
|
+
out_space_str = @out_space.map{ |token|
|
136
|
+
out_charset[token]&.str || UNKNOWN_CHAR_OUTPUT
|
137
|
+
}.join("") if @out_space
|
138
|
+
|
133
139
|
# Convert output
|
134
140
|
ret = ""
|
135
141
|
tokens.each{ |token|
|
136
|
-
case token
|
142
|
+
case token
|
137
143
|
when ""
|
138
144
|
when "*UNKNOWN"
|
139
145
|
ret += UNKNOWN_CHAR_OUTPUT
|
@@ -142,13 +148,13 @@ module Glaemscribe
|
|
142
148
|
when "*LF"
|
143
149
|
ret += "\n"
|
144
150
|
else
|
145
|
-
c = out_charset[token]
|
151
|
+
c = out_charset[token]
|
146
152
|
ret += (c.nil?)?(UNKNOWN_CHAR_OUTPUT):c.str
|
147
|
-
end
|
153
|
+
end
|
148
154
|
}
|
149
155
|
ret
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
153
159
|
end
|
154
160
|
end
|
@@ -73,7 +73,7 @@ module Glaemscribe
|
|
73
73
|
}
|
74
74
|
end
|
75
75
|
|
76
|
-
def apply(l)
|
76
|
+
def apply(l, debug_context)
|
77
77
|
ret = []
|
78
78
|
current_group = nil
|
79
79
|
accumulated_word = ""
|
@@ -81,14 +81,14 @@ module Glaemscribe
|
|
81
81
|
l.split("").each{ |c|
|
82
82
|
case c
|
83
83
|
when " ", "\t"
|
84
|
-
ret += transcribe_word(accumulated_word)
|
84
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
85
85
|
ret += ["*SPACE"]
|
86
86
|
|
87
87
|
accumulated_word = ""
|
88
88
|
when "\r"
|
89
89
|
# Ignore
|
90
90
|
when "\n"
|
91
|
-
ret += transcribe_word(accumulated_word)
|
91
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
92
92
|
ret += ["*LF"]
|
93
93
|
|
94
94
|
accumulated_word = ""
|
@@ -97,24 +97,27 @@ module Glaemscribe
|
|
97
97
|
if c_group == current_group
|
98
98
|
accumulated_word += c
|
99
99
|
else
|
100
|
-
ret += transcribe_word(accumulated_word)
|
100
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
101
101
|
current_group = c_group
|
102
102
|
accumulated_word = c
|
103
103
|
end
|
104
104
|
end
|
105
105
|
}
|
106
106
|
# Just in case
|
107
|
-
ret += transcribe_word(accumulated_word)
|
107
|
+
ret += transcribe_word(accumulated_word, debug_context)
|
108
108
|
ret
|
109
109
|
end
|
110
110
|
|
111
|
-
def transcribe_word(word)
|
111
|
+
def transcribe_word(word, debug_context)
|
112
112
|
res = []
|
113
113
|
word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
|
114
114
|
while word.length != 0
|
115
|
-
|
116
|
-
word
|
117
|
-
|
115
|
+
tokens, len = @transcription_tree.transcribe(word)
|
116
|
+
word = word[len..-1]
|
117
|
+
eaten = word[0..len-1]
|
118
|
+
res += tokens
|
119
|
+
|
120
|
+
debug_context.processor_pathes << [eaten, tokens, tokens]
|
118
121
|
end
|
119
122
|
# Return token list
|
120
123
|
res
|
data/lib/glaemscribe.rb
CHANGED
@@ -67,6 +67,8 @@ module Glaemscribe
|
|
67
67
|
require API_PATH + "pre_processor/substitute.rb"
|
68
68
|
require API_PATH + "pre_processor/rxsubstitute.rb"
|
69
69
|
require API_PATH + "pre_processor/up_down_tehta_split.rb"
|
70
|
+
|
71
|
+
require API_PATH + "post_processor/outspace.rb"
|
70
72
|
require API_PATH + "post_processor/reverse.rb"
|
71
73
|
require API_PATH + "post_processor/resolve_virtuals.rb"
|
72
74
|
|