glaemscribe 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
@@ -0,0 +1,29 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ WORD_BREAKER = "|"
26
+ WORD_BOUNDARY = "_"
27
+ UNKNOWN_CHAR_OUTPUT = "☠"
28
+ end
29
+ end
data/lib/api/debug.rb ADDED
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ module Glaemscribe
23
+ module API
24
+ module Debug
25
+
26
+ def self.enabled=(b)
27
+ @debug_mode = b
28
+ end
29
+
30
+ def self.log(s)
31
+ puts s if @debug_mode
32
+ end
33
+
34
+ end
35
+ end
36
+ end
data/lib/api/eval.rb ADDED
@@ -0,0 +1,268 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ module Eval
26
+
27
+ class IfEvalError < StandardError
28
+ end
29
+
30
+ class UnknownToken < IfEvalError
31
+ end
32
+
33
+ class SyntaxError < IfEvalError
34
+ end
35
+
36
+ class Token
37
+ attr_reader :name, :expression
38
+ attr_accessor :value
39
+ def initialize(name, expression)
40
+ @name = name
41
+ @expression = expression
42
+ @value = nil
43
+ end
44
+ def regexp?
45
+ @expression.is_a? Regexp
46
+ end
47
+ def clone(value=nil)
48
+ t = super()
49
+ t.value = value
50
+ t
51
+ end
52
+ end
53
+
54
+ class Lexer
55
+ attr_reader :exp, :token_chain
56
+
57
+ EXP_TOKENS = [
58
+ Token.new("bool_or", "||"),
59
+ Token.new("bool_and", "&&"),
60
+ Token.new("cond_inf_eq", "<="),
61
+ Token.new("cond_inf", "<"),
62
+ Token.new("cond_sup_eq", ">="),
63
+ Token.new("cond_sup", ">"),
64
+ Token.new("cond_eq", "=="),
65
+ Token.new("cond_not_eq", "!="),
66
+ Token.new("add_plus", "+"),
67
+ Token.new("add_minus", "-"),
68
+ Token.new("mult_times", "*"),
69
+ Token.new("mult_div", "/"),
70
+ Token.new("mult_modulo", "%"),
71
+ Token.new("prim_not", "!"),
72
+ Token.new("prim_lparen", "("),
73
+ Token.new("prim_rparen", ")"),
74
+ Token.new("prim_string", /^'[^']*'/),
75
+ Token.new("prim_string", /^"[^"]*"/),
76
+ Token.new("prim_const", /^[a-zA-Z0-9_.]+/)
77
+ ]
78
+
79
+ TOKEN_END = Token.new("prim_end","")
80
+
81
+ def initialize(exp)
82
+ @exp = exp
83
+ @token_chain = []
84
+ @retain_last = false
85
+ end
86
+
87
+ def uneat
88
+ @retain_last = true
89
+ end
90
+
91
+ def advance
92
+ @exp.strip!
93
+
94
+ if @retain_last
95
+ @retain_last = false
96
+ return @token_chain.last
97
+ end
98
+
99
+ if(@exp == TOKEN_END.expression)
100
+ t = TOKEN_END.clone("")
101
+ @token_chain << t
102
+ return t
103
+ end
104
+
105
+ EXP_TOKENS.each{ |token|
106
+ if(token.regexp?)
107
+ if(token.expression =~ @exp)
108
+ @exp = $' # Eat the token
109
+ t = token.clone($~.to_s)
110
+ @token_chain << t
111
+ return t
112
+ end
113
+ else
114
+ if(@exp.start_with?(token.expression))
115
+ @exp = @exp[token.expression.length..-1]
116
+ t = token.clone(token.expression)
117
+ @token_chain << t
118
+ return t
119
+ end
120
+ end
121
+ }
122
+ raise UnknownToken
123
+ end
124
+
125
+ end
126
+
127
+ class Parser
128
+ def parse(exp, vars)
129
+ @lexer = Lexer.new(exp)
130
+ @vars = {}; vars.each{ |k,v| @vars[k.to_s] = v } # Cast symbols
131
+ parse_top_level
132
+ end
133
+
134
+ def parse_top_level
135
+ explore_bool
136
+ end
137
+
138
+ def explore_bool
139
+ v = explore_compare
140
+ loop do
141
+ case @lexer.advance().name
142
+ when 'bool_or'
143
+ if v
144
+ explore_bool
145
+ else
146
+ v = explore_compare
147
+ end
148
+ when 'bool_and'
149
+ if !v
150
+ explore_bool
151
+ else
152
+ v = explore_compare
153
+ end
154
+ else break
155
+ end
156
+ end
157
+ @lexer.uneat # Keep the unused token for the higher level
158
+ v
159
+ end
160
+
161
+ def explore_compare
162
+ v = explore_add
163
+ loop do
164
+ case @lexer.advance().name
165
+ when 'cond_inf_eq' then v = (v <= explore_add)
166
+ when 'cond_inf' then v = (v < explore_add)
167
+ when 'cond_sup_eq' then v = (v >= explore_add)
168
+ when 'cond_sup' then v = (v > explore_add)
169
+ when 'cond_eq' then v = (v == explore_add)
170
+ when 'cond_not_eq' then v = (v != explore_add)
171
+ else break
172
+ end
173
+ end
174
+ @lexer.uneat # Keep the unused token for the higher level
175
+ v
176
+ end
177
+
178
+ def explore_add
179
+ v = explore_mult
180
+ loop do
181
+ case @lexer.advance().name
182
+ when 'add_plus' then v += explore_mult
183
+ when 'add_minus' then v -= explore_mult
184
+ else break
185
+ end
186
+ end
187
+ @lexer.uneat # Keep the unused token for the higher level
188
+ v
189
+ end
190
+
191
+ def explore_mult
192
+ v = explore_primary
193
+ loop do
194
+ case @lexer.advance().name
195
+ when 'mult_times' then v *= explore_primary
196
+ when 'mult_div' then v /= explore_primary
197
+ when 'mult_modulo' then v %= explore_primary
198
+ else break
199
+ end
200
+ end
201
+ @lexer.uneat # Keep the unused token for the higher level
202
+ v
203
+ end
204
+
205
+ def explore_primary
206
+ token = @lexer.advance()
207
+ case token.name
208
+ when 'prim_const'
209
+ v = cast_constant(token.value)
210
+ when 'add_minus' # Allow the use of - as primary token for negative numbers
211
+ v = -explore_primary
212
+ when 'prim_not' # Allow the use of ! for booleans
213
+ v = !explore_primary
214
+ when 'prim_lparen'
215
+ v = parse_top_level
216
+ rtoken = @lexer.advance()
217
+ raise SyntaxError.new("Missing right parenthesis.") if(rtoken.name != 'prim_rparen')
218
+ else raise SyntaxError.new("Cannot understand: #{token.value}.")
219
+ end
220
+ v
221
+ end
222
+
223
+ def constant_is_float?(const)
224
+ Float(const) rescue false
225
+ end
226
+
227
+ def constant_is_int?(const)
228
+ Integer(const) rescue false
229
+ end
230
+
231
+ def constant_is_string?(const)
232
+ return false if const.length < 2
233
+ f = const[0]
234
+ l = const[-1]
235
+ return ( f == l && (l == "'" || l == '"') )
236
+ end
237
+
238
+ def cast_constant(const)
239
+ if constant_is_int?(const)
240
+ const.to_i
241
+ elsif constant_is_float?(const)
242
+ const.to_f
243
+ elsif const[/^\'(.*)\'$/] || const[/^\"(.*)\"$/]
244
+ $1
245
+ elsif const == 'true'
246
+ true
247
+ elsif const == 'false'
248
+ false
249
+ elsif const == 'nil'
250
+ nil
251
+ elsif(@vars[const] != nil)
252
+ @vars[const]
253
+ else
254
+ raise SyntaxError.new("Cannot understand constant '#{const}'.")
255
+ end
256
+ end
257
+ end
258
+ end
259
+ end
260
+ end
261
+
262
+ =begin
263
+ l = Glaemscribe::API::Eval::Parser.new
264
+ puts l.parse("2+2+2", {})
265
+ puts l.parse("1>2 && 2<3",{})
266
+ puts l.parse("option == OPTION",{:option => 2, :OPTION => 2})
267
+ puts l.parse("option == (OPTION && false)",{:option => 2, :OPTION => 2})
268
+ =end
@@ -0,0 +1,113 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ #
23
+ # A Fragment is a sequence of equivalences
24
+ # For example h(a|ä)(i|ï) represents the four combinations:
25
+ # hai, haï, häi, häï
26
+
27
+ module Glaemscribe
28
+ module API
29
+ class Fragment
30
+
31
+ attr_reader :sheaf
32
+ attr_reader :rule
33
+ attr_reader :mode
34
+ attr_reader :combinations
35
+
36
+ def src?; @sheaf.src?; end
37
+ def dst?; @sheaf.dst?; end
38
+
39
+ EQUIVALENCE_SEPARATOR = ","
40
+
41
+ EQUIVALENCE_RX_OUT = /(\(.*?\))/
42
+ EQUIVALENCE_RX_IN = /\((.*?)\)/
43
+
44
+ # Should pass a fragment expression, e.g. : "h(a|ä)(i|ï)"
45
+ def initialize(sheaf, expression)
46
+ @sheaf = sheaf
47
+ @mode = sheaf.mode
48
+ @rule = sheaf.rule
49
+ @expression = expression
50
+
51
+ # Split the fragment, turn it into an array of arrays, e.g. [[h],[a,ä],[i,ï]]
52
+ equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }
53
+ equivalences = equivalences.map{ |eq|
54
+ eq =~ EQUIVALENCE_RX_IN
55
+ if $1
56
+ eq = $1.split(EQUIVALENCE_SEPARATOR,-1).map{ |elt|
57
+ elt = elt.strip
58
+ if elt.empty?
59
+ @rule.errors << "Null members are not allowed in equivalences!"
60
+ return
61
+ end
62
+ elt.split(/\s/)
63
+ }
64
+ else
65
+ eq = [eq.split(/\s/)] # This equivalence has only one possibility
66
+ end
67
+ }
68
+
69
+ equivalences = [[[""]]] if equivalences.empty?
70
+
71
+ # In the case of a destination fragment, check that all symbols used are found
72
+ # in the charsets used by the mode
73
+ if dst?
74
+ mode = @sheaf.mode
75
+ equivalences.each{ |eq|
76
+ eq.each{ |member|
77
+ member.each{ |token|
78
+ next if token.empty? # NULL case
79
+ mode.supported_charsets.each{ |charset_name, charset|
80
+ symbol = charset[token]
81
+ if !symbol
82
+ @rule.errors << "Symbol #{token} not found in charset '#{charset.name}'!"
83
+ return
84
+ end
85
+ }
86
+ }
87
+ }
88
+ }
89
+ end
90
+
91
+ # Calculate all combinations for this fragment (productize the array of arrays)
92
+ res = equivalences[0]
93
+
94
+ (equivalences.length-1).times { |i|
95
+ prod = res.product(equivalences[i+1]).map{ |x,y| x+y}
96
+ res = prod
97
+ }
98
+
99
+ @combinations = res
100
+ end
101
+
102
+
103
+ def p
104
+ ret = "---- " + @expression + "\n"
105
+ @combinations.each{ |c|
106
+ ret += "------ " + c.inspect + "\n"
107
+ }
108
+ ret
109
+ end
110
+
111
+ end
112
+ end
113
+ end