glaemscribe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/bin/glaemscribe +307 -0
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds.cst +318 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +120 -0
- data/glaemresources/modes/adunaic.glaem +251 -0
- data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
- data/glaemresources/modes/blackspeech.glaem +260 -0
- data/glaemresources/modes/gothic.glaem +78 -0
- data/glaemresources/modes/khuzdul.glaem +141 -0
- data/glaemresources/modes/mercian.glaem +419 -0
- data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
- data/glaemresources/modes/quenya-sarati.glaem +320 -0
- data/glaemresources/modes/quenya.glaem +307 -0
- data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
- data/glaemresources/modes/sindarin-classical.glaem +276 -0
- data/glaemresources/modes/sindarin-daeron.glaem +182 -0
- data/glaemresources/modes/telerin.glaem +302 -0
- data/glaemresources/modes/valarin-sarati.glaem +210 -0
- data/glaemresources/modes/westron.glaem +340 -0
- data/glaemresources/modes/westsaxon.glaem +342 -0
- data/lib/api/charset.rb +84 -0
- data/lib/api/charset_parser.rb +55 -0
- data/lib/api/constants.rb +29 -0
- data/lib/api/debug.rb +36 -0
- data/lib/api/eval.rb +268 -0
- data/lib/api/fragment.rb +113 -0
- data/lib/api/glaeml.rb +200 -0
- data/lib/api/if_tree.rb +96 -0
- data/lib/api/mode.rb +112 -0
- data/lib/api/mode_parser.rb +314 -0
- data/lib/api/option.rb +64 -0
- data/lib/api/post_processor/reverse.rb +36 -0
- data/lib/api/pre_processor/downcase.rb +35 -0
- data/lib/api/pre_processor/elvish_numbers.rb +47 -0
- data/lib/api/pre_processor/rxsubstitute.rb +40 -0
- data/lib/api/pre_processor/substitute.rb +38 -0
- data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
- data/lib/api/resource_manager.rb +130 -0
- data/lib/api/rule.rb +99 -0
- data/lib/api/rule_group.rb +159 -0
- data/lib/api/sheaf.rb +70 -0
- data/lib/api/sheaf_chain.rb +86 -0
- data/lib/api/sheaf_chain_iterator.rb +108 -0
- data/lib/api/sub_rule.rb +40 -0
- data/lib/api/transcription_pre_post_processor.rb +118 -0
- data/lib/api/transcription_processor.rb +137 -0
- data/lib/api/transcription_tree_node.rb +91 -0
- data/lib/glaemscribe.rb +70 -0
- metadata +112 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
WORD_BREAKER = "|"
|
26
|
+
WORD_BOUNDARY = "_"
|
27
|
+
UNKNOWN_CHAR_OUTPUT = "☠"
|
28
|
+
end
|
29
|
+
end
|
data/lib/api/debug.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
module Glaemscribe
|
23
|
+
module API
|
24
|
+
module Debug
|
25
|
+
|
26
|
+
def self.enabled=(b)
|
27
|
+
@debug_mode = b
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.log(s)
|
31
|
+
puts s if @debug_mode
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/api/eval.rb
ADDED
@@ -0,0 +1,268 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
module Eval
|
26
|
+
|
27
|
+
class IfEvalError < StandardError
|
28
|
+
end
|
29
|
+
|
30
|
+
class UnknownToken < IfEvalError
|
31
|
+
end
|
32
|
+
|
33
|
+
class SyntaxError < IfEvalError
|
34
|
+
end
|
35
|
+
|
36
|
+
class Token
|
37
|
+
attr_reader :name, :expression
|
38
|
+
attr_accessor :value
|
39
|
+
def initialize(name, expression)
|
40
|
+
@name = name
|
41
|
+
@expression = expression
|
42
|
+
@value = nil
|
43
|
+
end
|
44
|
+
def regexp?
|
45
|
+
@expression.is_a? Regexp
|
46
|
+
end
|
47
|
+
def clone(value=nil)
|
48
|
+
t = super()
|
49
|
+
t.value = value
|
50
|
+
t
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class Lexer
|
55
|
+
attr_reader :exp, :token_chain
|
56
|
+
|
57
|
+
EXP_TOKENS = [
|
58
|
+
Token.new("bool_or", "||"),
|
59
|
+
Token.new("bool_and", "&&"),
|
60
|
+
Token.new("cond_inf_eq", "<="),
|
61
|
+
Token.new("cond_inf", "<"),
|
62
|
+
Token.new("cond_sup_eq", ">="),
|
63
|
+
Token.new("cond_sup", ">"),
|
64
|
+
Token.new("cond_eq", "=="),
|
65
|
+
Token.new("cond_not_eq", "!="),
|
66
|
+
Token.new("add_plus", "+"),
|
67
|
+
Token.new("add_minus", "-"),
|
68
|
+
Token.new("mult_times", "*"),
|
69
|
+
Token.new("mult_div", "/"),
|
70
|
+
Token.new("mult_modulo", "%"),
|
71
|
+
Token.new("prim_not", "!"),
|
72
|
+
Token.new("prim_lparen", "("),
|
73
|
+
Token.new("prim_rparen", ")"),
|
74
|
+
Token.new("prim_string", /^'[^']*'/),
|
75
|
+
Token.new("prim_string", /^"[^"]*"/),
|
76
|
+
Token.new("prim_const", /^[a-zA-Z0-9_.]+/)
|
77
|
+
]
|
78
|
+
|
79
|
+
TOKEN_END = Token.new("prim_end","")
|
80
|
+
|
81
|
+
def initialize(exp)
|
82
|
+
@exp = exp
|
83
|
+
@token_chain = []
|
84
|
+
@retain_last = false
|
85
|
+
end
|
86
|
+
|
87
|
+
def uneat
|
88
|
+
@retain_last = true
|
89
|
+
end
|
90
|
+
|
91
|
+
def advance
|
92
|
+
@exp.strip!
|
93
|
+
|
94
|
+
if @retain_last
|
95
|
+
@retain_last = false
|
96
|
+
return @token_chain.last
|
97
|
+
end
|
98
|
+
|
99
|
+
if(@exp == TOKEN_END.expression)
|
100
|
+
t = TOKEN_END.clone("")
|
101
|
+
@token_chain << t
|
102
|
+
return t
|
103
|
+
end
|
104
|
+
|
105
|
+
EXP_TOKENS.each{ |token|
|
106
|
+
if(token.regexp?)
|
107
|
+
if(token.expression =~ @exp)
|
108
|
+
@exp = $' # Eat the token
|
109
|
+
t = token.clone($~.to_s)
|
110
|
+
@token_chain << t
|
111
|
+
return t
|
112
|
+
end
|
113
|
+
else
|
114
|
+
if(@exp.start_with?(token.expression))
|
115
|
+
@exp = @exp[token.expression.length..-1]
|
116
|
+
t = token.clone(token.expression)
|
117
|
+
@token_chain << t
|
118
|
+
return t
|
119
|
+
end
|
120
|
+
end
|
121
|
+
}
|
122
|
+
raise UnknownToken
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
class Parser
|
128
|
+
def parse(exp, vars)
|
129
|
+
@lexer = Lexer.new(exp)
|
130
|
+
@vars = {}; vars.each{ |k,v| @vars[k.to_s] = v } # Cast symbols
|
131
|
+
parse_top_level
|
132
|
+
end
|
133
|
+
|
134
|
+
def parse_top_level
|
135
|
+
explore_bool
|
136
|
+
end
|
137
|
+
|
138
|
+
def explore_bool
|
139
|
+
v = explore_compare
|
140
|
+
loop do
|
141
|
+
case @lexer.advance().name
|
142
|
+
when 'bool_or'
|
143
|
+
if v
|
144
|
+
explore_bool
|
145
|
+
else
|
146
|
+
v = explore_compare
|
147
|
+
end
|
148
|
+
when 'bool_and'
|
149
|
+
if !v
|
150
|
+
explore_bool
|
151
|
+
else
|
152
|
+
v = explore_compare
|
153
|
+
end
|
154
|
+
else break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
@lexer.uneat # Keep the unused token for the higher level
|
158
|
+
v
|
159
|
+
end
|
160
|
+
|
161
|
+
def explore_compare
|
162
|
+
v = explore_add
|
163
|
+
loop do
|
164
|
+
case @lexer.advance().name
|
165
|
+
when 'cond_inf_eq' then v = (v <= explore_add)
|
166
|
+
when 'cond_inf' then v = (v < explore_add)
|
167
|
+
when 'cond_sup_eq' then v = (v >= explore_add)
|
168
|
+
when 'cond_sup' then v = (v > explore_add)
|
169
|
+
when 'cond_eq' then v = (v == explore_add)
|
170
|
+
when 'cond_not_eq' then v = (v != explore_add)
|
171
|
+
else break
|
172
|
+
end
|
173
|
+
end
|
174
|
+
@lexer.uneat # Keep the unused token for the higher level
|
175
|
+
v
|
176
|
+
end
|
177
|
+
|
178
|
+
def explore_add
|
179
|
+
v = explore_mult
|
180
|
+
loop do
|
181
|
+
case @lexer.advance().name
|
182
|
+
when 'add_plus' then v += explore_mult
|
183
|
+
when 'add_minus' then v -= explore_mult
|
184
|
+
else break
|
185
|
+
end
|
186
|
+
end
|
187
|
+
@lexer.uneat # Keep the unused token for the higher level
|
188
|
+
v
|
189
|
+
end
|
190
|
+
|
191
|
+
def explore_mult
|
192
|
+
v = explore_primary
|
193
|
+
loop do
|
194
|
+
case @lexer.advance().name
|
195
|
+
when 'mult_times' then v *= explore_primary
|
196
|
+
when 'mult_div' then v /= explore_primary
|
197
|
+
when 'mult_modulo' then v %= explore_primary
|
198
|
+
else break
|
199
|
+
end
|
200
|
+
end
|
201
|
+
@lexer.uneat # Keep the unused token for the higher level
|
202
|
+
v
|
203
|
+
end
|
204
|
+
|
205
|
+
def explore_primary
|
206
|
+
token = @lexer.advance()
|
207
|
+
case token.name
|
208
|
+
when 'prim_const'
|
209
|
+
v = cast_constant(token.value)
|
210
|
+
when 'add_minus' # Allow the use of - as primary token for negative numbers
|
211
|
+
v = -explore_primary
|
212
|
+
when 'prim_not' # Allow the use of ! for booleans
|
213
|
+
v = !explore_primary
|
214
|
+
when 'prim_lparen'
|
215
|
+
v = parse_top_level
|
216
|
+
rtoken = @lexer.advance()
|
217
|
+
raise SyntaxError.new("Missing right parenthesis.") if(rtoken.name != 'prim_rparen')
|
218
|
+
else raise SyntaxError.new("Cannot understand: #{token.value}.")
|
219
|
+
end
|
220
|
+
v
|
221
|
+
end
|
222
|
+
|
223
|
+
def constant_is_float?(const)
|
224
|
+
Float(const) rescue false
|
225
|
+
end
|
226
|
+
|
227
|
+
def constant_is_int?(const)
|
228
|
+
Integer(const) rescue false
|
229
|
+
end
|
230
|
+
|
231
|
+
def constant_is_string?(const)
|
232
|
+
return false if const.length < 2
|
233
|
+
f = const[0]
|
234
|
+
l = const[-1]
|
235
|
+
return ( f == l && (l == "'" || l == '"') )
|
236
|
+
end
|
237
|
+
|
238
|
+
def cast_constant(const)
|
239
|
+
if constant_is_int?(const)
|
240
|
+
const.to_i
|
241
|
+
elsif constant_is_float?(const)
|
242
|
+
const.to_f
|
243
|
+
elsif const[/^\'(.*)\'$/] || const[/^\"(.*)\"$/]
|
244
|
+
$1
|
245
|
+
elsif const == 'true'
|
246
|
+
true
|
247
|
+
elsif const == 'false'
|
248
|
+
false
|
249
|
+
elsif const == 'nil'
|
250
|
+
nil
|
251
|
+
elsif(@vars[const] != nil)
|
252
|
+
@vars[const]
|
253
|
+
else
|
254
|
+
raise SyntaxError.new("Cannot understand constant '#{const}'.")
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
=begin
|
263
|
+
l = Glaemscribe::API::Eval::Parser.new
|
264
|
+
puts l.parse("2+2+2", {})
|
265
|
+
puts l.parse("1>2 && 2<3",{})
|
266
|
+
puts l.parse("option == OPTION",{:option => 2, :OPTION => 2})
|
267
|
+
puts l.parse("option == (OPTION && false)",{:option => 2, :OPTION => 2})
|
268
|
+
=end
|
data/lib/api/fragment.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
#
|
23
|
+
# A Fragment is a sequence of equivalences
|
24
|
+
# For example h(a|ä)(i|ï) represents the four combinations:
|
25
|
+
# hai, haï, häi, häï
|
26
|
+
|
27
|
+
module Glaemscribe
|
28
|
+
module API
|
29
|
+
class Fragment
|
30
|
+
|
31
|
+
attr_reader :sheaf
|
32
|
+
attr_reader :rule
|
33
|
+
attr_reader :mode
|
34
|
+
attr_reader :combinations
|
35
|
+
|
36
|
+
def src?; @sheaf.src?; end
|
37
|
+
def dst?; @sheaf.dst?; end
|
38
|
+
|
39
|
+
EQUIVALENCE_SEPARATOR = ","
|
40
|
+
|
41
|
+
EQUIVALENCE_RX_OUT = /(\(.*?\))/
|
42
|
+
EQUIVALENCE_RX_IN = /\((.*?)\)/
|
43
|
+
|
44
|
+
# Should pass a fragment expression, e.g. : "h(a|ä)(i|ï)"
|
45
|
+
def initialize(sheaf, expression)
|
46
|
+
@sheaf = sheaf
|
47
|
+
@mode = sheaf.mode
|
48
|
+
@rule = sheaf.rule
|
49
|
+
@expression = expression
|
50
|
+
|
51
|
+
# Split the fragment, turn it into an array of arrays, e.g. [[h],[a,ä],[i,ï]]
|
52
|
+
equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }
|
53
|
+
equivalences = equivalences.map{ |eq|
|
54
|
+
eq =~ EQUIVALENCE_RX_IN
|
55
|
+
if $1
|
56
|
+
eq = $1.split(EQUIVALENCE_SEPARATOR,-1).map{ |elt|
|
57
|
+
elt = elt.strip
|
58
|
+
if elt.empty?
|
59
|
+
@rule.errors << "Null members are not allowed in equivalences!"
|
60
|
+
return
|
61
|
+
end
|
62
|
+
elt.split(/\s/)
|
63
|
+
}
|
64
|
+
else
|
65
|
+
eq = [eq.split(/\s/)] # This equivalence has only one possibility
|
66
|
+
end
|
67
|
+
}
|
68
|
+
|
69
|
+
equivalences = [[[""]]] if equivalences.empty?
|
70
|
+
|
71
|
+
# In the case of a destination fragment, check that all symbols used are found
|
72
|
+
# in the charsets used by the mode
|
73
|
+
if dst?
|
74
|
+
mode = @sheaf.mode
|
75
|
+
equivalences.each{ |eq|
|
76
|
+
eq.each{ |member|
|
77
|
+
member.each{ |token|
|
78
|
+
next if token.empty? # NULL case
|
79
|
+
mode.supported_charsets.each{ |charset_name, charset|
|
80
|
+
symbol = charset[token]
|
81
|
+
if !symbol
|
82
|
+
@rule.errors << "Symbol #{token} not found in charset '#{charset.name}'!"
|
83
|
+
return
|
84
|
+
end
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
end
|
90
|
+
|
91
|
+
# Calculate all combinations for this fragment (productize the array of arrays)
|
92
|
+
res = equivalences[0]
|
93
|
+
|
94
|
+
(equivalences.length-1).times { |i|
|
95
|
+
prod = res.product(equivalences[i+1]).map{ |x,y| x+y}
|
96
|
+
res = prod
|
97
|
+
}
|
98
|
+
|
99
|
+
@combinations = res
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
def p
|
104
|
+
ret = "---- " + @expression + "\n"
|
105
|
+
@combinations.each{ |c|
|
106
|
+
ret += "------ " + c.inspect + "\n"
|
107
|
+
}
|
108
|
+
ret
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|