glaemscribe 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/bin/glaemscribe +307 -0
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds.cst +318 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +120 -0
- data/glaemresources/modes/adunaic.glaem +251 -0
- data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
- data/glaemresources/modes/blackspeech.glaem +260 -0
- data/glaemresources/modes/gothic.glaem +78 -0
- data/glaemresources/modes/khuzdul.glaem +141 -0
- data/glaemresources/modes/mercian.glaem +419 -0
- data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
- data/glaemresources/modes/quenya-sarati.glaem +320 -0
- data/glaemresources/modes/quenya.glaem +307 -0
- data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
- data/glaemresources/modes/sindarin-classical.glaem +276 -0
- data/glaemresources/modes/sindarin-daeron.glaem +182 -0
- data/glaemresources/modes/telerin.glaem +302 -0
- data/glaemresources/modes/valarin-sarati.glaem +210 -0
- data/glaemresources/modes/westron.glaem +340 -0
- data/glaemresources/modes/westsaxon.glaem +342 -0
- data/lib/api/charset.rb +84 -0
- data/lib/api/charset_parser.rb +55 -0
- data/lib/api/constants.rb +29 -0
- data/lib/api/debug.rb +36 -0
- data/lib/api/eval.rb +268 -0
- data/lib/api/fragment.rb +113 -0
- data/lib/api/glaeml.rb +200 -0
- data/lib/api/if_tree.rb +96 -0
- data/lib/api/mode.rb +112 -0
- data/lib/api/mode_parser.rb +314 -0
- data/lib/api/option.rb +64 -0
- data/lib/api/post_processor/reverse.rb +36 -0
- data/lib/api/pre_processor/downcase.rb +35 -0
- data/lib/api/pre_processor/elvish_numbers.rb +47 -0
- data/lib/api/pre_processor/rxsubstitute.rb +40 -0
- data/lib/api/pre_processor/substitute.rb +38 -0
- data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
- data/lib/api/resource_manager.rb +130 -0
- data/lib/api/rule.rb +99 -0
- data/lib/api/rule_group.rb +159 -0
- data/lib/api/sheaf.rb +70 -0
- data/lib/api/sheaf_chain.rb +86 -0
- data/lib/api/sheaf_chain_iterator.rb +108 -0
- data/lib/api/sub_rule.rb +40 -0
- data/lib/api/transcription_pre_post_processor.rb +118 -0
- data/lib/api/transcription_processor.rb +137 -0
- data/lib/api/transcription_tree_node.rb +91 -0
- data/lib/glaemscribe.rb +70 -0
- metadata +112 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
WORD_BREAKER = "|"
|
26
|
+
WORD_BOUNDARY = "_"
|
27
|
+
UNKNOWN_CHAR_OUTPUT = "☠"
|
28
|
+
end
|
29
|
+
end
|
data/lib/api/debug.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
module Glaemscribe
|
23
|
+
module API
|
24
|
+
module Debug
|
25
|
+
|
26
|
+
def self.enabled=(b)
|
27
|
+
@debug_mode = b
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.log(s)
|
31
|
+
puts s if @debug_mode
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/api/eval.rb
ADDED
@@ -0,0 +1,268 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
module Eval
|
26
|
+
|
27
|
+
class IfEvalError < StandardError
|
28
|
+
end
|
29
|
+
|
30
|
+
class UnknownToken < IfEvalError
|
31
|
+
end
|
32
|
+
|
33
|
+
class SyntaxError < IfEvalError
|
34
|
+
end
|
35
|
+
|
36
|
+
class Token
|
37
|
+
attr_reader :name, :expression
|
38
|
+
attr_accessor :value
|
39
|
+
def initialize(name, expression)
|
40
|
+
@name = name
|
41
|
+
@expression = expression
|
42
|
+
@value = nil
|
43
|
+
end
|
44
|
+
def regexp?
|
45
|
+
@expression.is_a? Regexp
|
46
|
+
end
|
47
|
+
def clone(value=nil)
|
48
|
+
t = super()
|
49
|
+
t.value = value
|
50
|
+
t
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class Lexer
|
55
|
+
attr_reader :exp, :token_chain
|
56
|
+
|
57
|
+
EXP_TOKENS = [
|
58
|
+
Token.new("bool_or", "||"),
|
59
|
+
Token.new("bool_and", "&&"),
|
60
|
+
Token.new("cond_inf_eq", "<="),
|
61
|
+
Token.new("cond_inf", "<"),
|
62
|
+
Token.new("cond_sup_eq", ">="),
|
63
|
+
Token.new("cond_sup", ">"),
|
64
|
+
Token.new("cond_eq", "=="),
|
65
|
+
Token.new("cond_not_eq", "!="),
|
66
|
+
Token.new("add_plus", "+"),
|
67
|
+
Token.new("add_minus", "-"),
|
68
|
+
Token.new("mult_times", "*"),
|
69
|
+
Token.new("mult_div", "/"),
|
70
|
+
Token.new("mult_modulo", "%"),
|
71
|
+
Token.new("prim_not", "!"),
|
72
|
+
Token.new("prim_lparen", "("),
|
73
|
+
Token.new("prim_rparen", ")"),
|
74
|
+
Token.new("prim_string", /^'[^']*'/),
|
75
|
+
Token.new("prim_string", /^"[^"]*"/),
|
76
|
+
Token.new("prim_const", /^[a-zA-Z0-9_.]+/)
|
77
|
+
]
|
78
|
+
|
79
|
+
TOKEN_END = Token.new("prim_end","")
|
80
|
+
|
81
|
+
def initialize(exp)
|
82
|
+
@exp = exp
|
83
|
+
@token_chain = []
|
84
|
+
@retain_last = false
|
85
|
+
end
|
86
|
+
|
87
|
+
def uneat
|
88
|
+
@retain_last = true
|
89
|
+
end
|
90
|
+
|
91
|
+
def advance
|
92
|
+
@exp.strip!
|
93
|
+
|
94
|
+
if @retain_last
|
95
|
+
@retain_last = false
|
96
|
+
return @token_chain.last
|
97
|
+
end
|
98
|
+
|
99
|
+
if(@exp == TOKEN_END.expression)
|
100
|
+
t = TOKEN_END.clone("")
|
101
|
+
@token_chain << t
|
102
|
+
return t
|
103
|
+
end
|
104
|
+
|
105
|
+
EXP_TOKENS.each{ |token|
|
106
|
+
if(token.regexp?)
|
107
|
+
if(token.expression =~ @exp)
|
108
|
+
@exp = $' # Eat the token
|
109
|
+
t = token.clone($~.to_s)
|
110
|
+
@token_chain << t
|
111
|
+
return t
|
112
|
+
end
|
113
|
+
else
|
114
|
+
if(@exp.start_with?(token.expression))
|
115
|
+
@exp = @exp[token.expression.length..-1]
|
116
|
+
t = token.clone(token.expression)
|
117
|
+
@token_chain << t
|
118
|
+
return t
|
119
|
+
end
|
120
|
+
end
|
121
|
+
}
|
122
|
+
raise UnknownToken
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
class Parser
|
128
|
+
def parse(exp, vars)
|
129
|
+
@lexer = Lexer.new(exp)
|
130
|
+
@vars = {}; vars.each{ |k,v| @vars[k.to_s] = v } # Cast symbols
|
131
|
+
parse_top_level
|
132
|
+
end
|
133
|
+
|
134
|
+
def parse_top_level
|
135
|
+
explore_bool
|
136
|
+
end
|
137
|
+
|
138
|
+
def explore_bool
|
139
|
+
v = explore_compare
|
140
|
+
loop do
|
141
|
+
case @lexer.advance().name
|
142
|
+
when 'bool_or'
|
143
|
+
if v
|
144
|
+
explore_bool
|
145
|
+
else
|
146
|
+
v = explore_compare
|
147
|
+
end
|
148
|
+
when 'bool_and'
|
149
|
+
if !v
|
150
|
+
explore_bool
|
151
|
+
else
|
152
|
+
v = explore_compare
|
153
|
+
end
|
154
|
+
else break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
@lexer.uneat # Keep the unused token for the higher level
|
158
|
+
v
|
159
|
+
end
|
160
|
+
|
161
|
+
def explore_compare
|
162
|
+
v = explore_add
|
163
|
+
loop do
|
164
|
+
case @lexer.advance().name
|
165
|
+
when 'cond_inf_eq' then v = (v <= explore_add)
|
166
|
+
when 'cond_inf' then v = (v < explore_add)
|
167
|
+
when 'cond_sup_eq' then v = (v >= explore_add)
|
168
|
+
when 'cond_sup' then v = (v > explore_add)
|
169
|
+
when 'cond_eq' then v = (v == explore_add)
|
170
|
+
when 'cond_not_eq' then v = (v != explore_add)
|
171
|
+
else break
|
172
|
+
end
|
173
|
+
end
|
174
|
+
@lexer.uneat # Keep the unused token for the higher level
|
175
|
+
v
|
176
|
+
end
|
177
|
+
|
178
|
+
def explore_add
|
179
|
+
v = explore_mult
|
180
|
+
loop do
|
181
|
+
case @lexer.advance().name
|
182
|
+
when 'add_plus' then v += explore_mult
|
183
|
+
when 'add_minus' then v -= explore_mult
|
184
|
+
else break
|
185
|
+
end
|
186
|
+
end
|
187
|
+
@lexer.uneat # Keep the unused token for the higher level
|
188
|
+
v
|
189
|
+
end
|
190
|
+
|
191
|
+
def explore_mult
|
192
|
+
v = explore_primary
|
193
|
+
loop do
|
194
|
+
case @lexer.advance().name
|
195
|
+
when 'mult_times' then v *= explore_primary
|
196
|
+
when 'mult_div' then v /= explore_primary
|
197
|
+
when 'mult_modulo' then v %= explore_primary
|
198
|
+
else break
|
199
|
+
end
|
200
|
+
end
|
201
|
+
@lexer.uneat # Keep the unused token for the higher level
|
202
|
+
v
|
203
|
+
end
|
204
|
+
|
205
|
+
def explore_primary
|
206
|
+
token = @lexer.advance()
|
207
|
+
case token.name
|
208
|
+
when 'prim_const'
|
209
|
+
v = cast_constant(token.value)
|
210
|
+
when 'add_minus' # Allow the use of - as primary token for negative numbers
|
211
|
+
v = -explore_primary
|
212
|
+
when 'prim_not' # Allow the use of ! for booleans
|
213
|
+
v = !explore_primary
|
214
|
+
when 'prim_lparen'
|
215
|
+
v = parse_top_level
|
216
|
+
rtoken = @lexer.advance()
|
217
|
+
raise SyntaxError.new("Missing right parenthesis.") if(rtoken.name != 'prim_rparen')
|
218
|
+
else raise SyntaxError.new("Cannot understand: #{token.value}.")
|
219
|
+
end
|
220
|
+
v
|
221
|
+
end
|
222
|
+
|
223
|
+
def constant_is_float?(const)
|
224
|
+
Float(const) rescue false
|
225
|
+
end
|
226
|
+
|
227
|
+
def constant_is_int?(const)
|
228
|
+
Integer(const) rescue false
|
229
|
+
end
|
230
|
+
|
231
|
+
def constant_is_string?(const)
|
232
|
+
return false if const.length < 2
|
233
|
+
f = const[0]
|
234
|
+
l = const[-1]
|
235
|
+
return ( f == l && (l == "'" || l == '"') )
|
236
|
+
end
|
237
|
+
|
238
|
+
def cast_constant(const)
|
239
|
+
if constant_is_int?(const)
|
240
|
+
const.to_i
|
241
|
+
elsif constant_is_float?(const)
|
242
|
+
const.to_f
|
243
|
+
elsif const[/^\'(.*)\'$/] || const[/^\"(.*)\"$/]
|
244
|
+
$1
|
245
|
+
elsif const == 'true'
|
246
|
+
true
|
247
|
+
elsif const == 'false'
|
248
|
+
false
|
249
|
+
elsif const == 'nil'
|
250
|
+
nil
|
251
|
+
elsif(@vars[const] != nil)
|
252
|
+
@vars[const]
|
253
|
+
else
|
254
|
+
raise SyntaxError.new("Cannot understand constant '#{const}'.")
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
=begin
|
263
|
+
l = Glaemscribe::API::Eval::Parser.new
|
264
|
+
puts l.parse("2+2+2", {})
|
265
|
+
puts l.parse("1>2 && 2<3",{})
|
266
|
+
puts l.parse("option == OPTION",{:option => 2, :OPTION => 2})
|
267
|
+
puts l.parse("option == (OPTION && false)",{:option => 2, :OPTION => 2})
|
268
|
+
=end
|
data/lib/api/fragment.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
#
|
23
|
+
# A Fragment is a sequence of equivalences
|
24
|
+
# For example h(a|ä)(i|ï) represents the four combinations:
|
25
|
+
# hai, haï, häi, häï
|
26
|
+
|
27
|
+
module Glaemscribe
|
28
|
+
module API
|
29
|
+
class Fragment
|
30
|
+
|
31
|
+
attr_reader :sheaf
|
32
|
+
attr_reader :rule
|
33
|
+
attr_reader :mode
|
34
|
+
attr_reader :combinations
|
35
|
+
|
36
|
+
def src?; @sheaf.src?; end
|
37
|
+
def dst?; @sheaf.dst?; end
|
38
|
+
|
39
|
+
EQUIVALENCE_SEPARATOR = ","
|
40
|
+
|
41
|
+
EQUIVALENCE_RX_OUT = /(\(.*?\))/
|
42
|
+
EQUIVALENCE_RX_IN = /\((.*?)\)/
|
43
|
+
|
44
|
+
# Should pass a fragment expression, e.g. : "h(a|ä)(i|ï)"
|
45
|
+
def initialize(sheaf, expression)
|
46
|
+
@sheaf = sheaf
|
47
|
+
@mode = sheaf.mode
|
48
|
+
@rule = sheaf.rule
|
49
|
+
@expression = expression
|
50
|
+
|
51
|
+
# Split the fragment, turn it into an array of arrays, e.g. [[h],[a,ä],[i,ï]]
|
52
|
+
equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }
|
53
|
+
equivalences = equivalences.map{ |eq|
|
54
|
+
eq =~ EQUIVALENCE_RX_IN
|
55
|
+
if $1
|
56
|
+
eq = $1.split(EQUIVALENCE_SEPARATOR,-1).map{ |elt|
|
57
|
+
elt = elt.strip
|
58
|
+
if elt.empty?
|
59
|
+
@rule.errors << "Null members are not allowed in equivalences!"
|
60
|
+
return
|
61
|
+
end
|
62
|
+
elt.split(/\s/)
|
63
|
+
}
|
64
|
+
else
|
65
|
+
eq = [eq.split(/\s/)] # This equivalence has only one possibility
|
66
|
+
end
|
67
|
+
}
|
68
|
+
|
69
|
+
equivalences = [[[""]]] if equivalences.empty?
|
70
|
+
|
71
|
+
# In the case of a destination fragment, check that all symbols used are found
|
72
|
+
# in the charsets used by the mode
|
73
|
+
if dst?
|
74
|
+
mode = @sheaf.mode
|
75
|
+
equivalences.each{ |eq|
|
76
|
+
eq.each{ |member|
|
77
|
+
member.each{ |token|
|
78
|
+
next if token.empty? # NULL case
|
79
|
+
mode.supported_charsets.each{ |charset_name, charset|
|
80
|
+
symbol = charset[token]
|
81
|
+
if !symbol
|
82
|
+
@rule.errors << "Symbol #{token} not found in charset '#{charset.name}'!"
|
83
|
+
return
|
84
|
+
end
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
end
|
90
|
+
|
91
|
+
# Calculate all combinations for this fragment (productize the array of arrays)
|
92
|
+
res = equivalences[0]
|
93
|
+
|
94
|
+
(equivalences.length-1).times { |i|
|
95
|
+
prod = res.product(equivalences[i+1]).map{ |x,y| x+y}
|
96
|
+
res = prod
|
97
|
+
}
|
98
|
+
|
99
|
+
@combinations = res
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
def p
|
104
|
+
ret = "---- " + @expression + "\n"
|
105
|
+
@combinations.each{ |c|
|
106
|
+
ret += "------ " + c.inspect + "\n"
|
107
|
+
}
|
108
|
+
ret
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|