glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
@@ -0,0 +1,29 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ WORD_BREAKER = "|"
26
+ WORD_BOUNDARY = "_"
27
+ UNKNOWN_CHAR_OUTPUT = "☠"
28
+ end
29
+ end
data/lib/api/debug.rb ADDED
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ module Glaemscribe
23
+ module API
24
+ module Debug
25
+
26
+ def self.enabled=(b)
27
+ @debug_mode = b
28
+ end
29
+
30
+ def self.log(s)
31
+ puts s if @debug_mode
32
+ end
33
+
34
+ end
35
+ end
36
+ end
data/lib/api/eval.rb ADDED
@@ -0,0 +1,268 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ module Eval
26
+
27
+ class IfEvalError < StandardError
28
+ end
29
+
30
+ class UnknownToken < IfEvalError
31
+ end
32
+
33
+ class SyntaxError < IfEvalError
34
+ end
35
+
36
+ class Token
37
+ attr_reader :name, :expression
38
+ attr_accessor :value
39
+ def initialize(name, expression)
40
+ @name = name
41
+ @expression = expression
42
+ @value = nil
43
+ end
44
+ def regexp?
45
+ @expression.is_a? Regexp
46
+ end
47
+ def clone(value=nil)
48
+ t = super()
49
+ t.value = value
50
+ t
51
+ end
52
+ end
53
+
54
+ class Lexer
55
+ attr_reader :exp, :token_chain
56
+
57
+ EXP_TOKENS = [
58
+ Token.new("bool_or", "||"),
59
+ Token.new("bool_and", "&&"),
60
+ Token.new("cond_inf_eq", "<="),
61
+ Token.new("cond_inf", "<"),
62
+ Token.new("cond_sup_eq", ">="),
63
+ Token.new("cond_sup", ">"),
64
+ Token.new("cond_eq", "=="),
65
+ Token.new("cond_not_eq", "!="),
66
+ Token.new("add_plus", "+"),
67
+ Token.new("add_minus", "-"),
68
+ Token.new("mult_times", "*"),
69
+ Token.new("mult_div", "/"),
70
+ Token.new("mult_modulo", "%"),
71
+ Token.new("prim_not", "!"),
72
+ Token.new("prim_lparen", "("),
73
+ Token.new("prim_rparen", ")"),
74
+ Token.new("prim_string", /^'[^']*'/),
75
+ Token.new("prim_string", /^"[^"]*"/),
76
+ Token.new("prim_const", /^[a-zA-Z0-9_.]+/)
77
+ ]
78
+
79
+ TOKEN_END = Token.new("prim_end","")
80
+
81
+ def initialize(exp)
82
+ @exp = exp
83
+ @token_chain = []
84
+ @retain_last = false
85
+ end
86
+
87
+ def uneat
88
+ @retain_last = true
89
+ end
90
+
91
+ def advance
92
+ @exp.strip!
93
+
94
+ if @retain_last
95
+ @retain_last = false
96
+ return @token_chain.last
97
+ end
98
+
99
+ if(@exp == TOKEN_END.expression)
100
+ t = TOKEN_END.clone("")
101
+ @token_chain << t
102
+ return t
103
+ end
104
+
105
+ EXP_TOKENS.each{ |token|
106
+ if(token.regexp?)
107
+ if(token.expression =~ @exp)
108
+ @exp = $' # Eat the token
109
+ t = token.clone($~.to_s)
110
+ @token_chain << t
111
+ return t
112
+ end
113
+ else
114
+ if(@exp.start_with?(token.expression))
115
+ @exp = @exp[token.expression.length..-1]
116
+ t = token.clone(token.expression)
117
+ @token_chain << t
118
+ return t
119
+ end
120
+ end
121
+ }
122
+ raise UnknownToken
123
+ end
124
+
125
+ end
126
+
127
+ class Parser
128
+ def parse(exp, vars)
129
+ @lexer = Lexer.new(exp)
130
+ @vars = {}; vars.each{ |k,v| @vars[k.to_s] = v } # Cast symbols
131
+ parse_top_level
132
+ end
133
+
134
+ def parse_top_level
135
+ explore_bool
136
+ end
137
+
138
+ def explore_bool
139
+ v = explore_compare
140
+ loop do
141
+ case @lexer.advance().name
142
+ when 'bool_or'
143
+ if v
144
+ explore_bool
145
+ else
146
+ v = explore_compare
147
+ end
148
+ when 'bool_and'
149
+ if !v
150
+ explore_bool
151
+ else
152
+ v = explore_compare
153
+ end
154
+ else break
155
+ end
156
+ end
157
+ @lexer.uneat # Keep the unused token for the higher level
158
+ v
159
+ end
160
+
161
+ def explore_compare
162
+ v = explore_add
163
+ loop do
164
+ case @lexer.advance().name
165
+ when 'cond_inf_eq' then v = (v <= explore_add)
166
+ when 'cond_inf' then v = (v < explore_add)
167
+ when 'cond_sup_eq' then v = (v >= explore_add)
168
+ when 'cond_sup' then v = (v > explore_add)
169
+ when 'cond_eq' then v = (v == explore_add)
170
+ when 'cond_not_eq' then v = (v != explore_add)
171
+ else break
172
+ end
173
+ end
174
+ @lexer.uneat # Keep the unused token for the higher level
175
+ v
176
+ end
177
+
178
+ def explore_add
179
+ v = explore_mult
180
+ loop do
181
+ case @lexer.advance().name
182
+ when 'add_plus' then v += explore_mult
183
+ when 'add_minus' then v -= explore_mult
184
+ else break
185
+ end
186
+ end
187
+ @lexer.uneat # Keep the unused token for the higher level
188
+ v
189
+ end
190
+
191
+ def explore_mult
192
+ v = explore_primary
193
+ loop do
194
+ case @lexer.advance().name
195
+ when 'mult_times' then v *= explore_primary
196
+ when 'mult_div' then v /= explore_primary
197
+ when 'mult_modulo' then v %= explore_primary
198
+ else break
199
+ end
200
+ end
201
+ @lexer.uneat # Keep the unused token for the higher level
202
+ v
203
+ end
204
+
205
+ def explore_primary
206
+ token = @lexer.advance()
207
+ case token.name
208
+ when 'prim_const'
209
+ v = cast_constant(token.value)
210
+ when 'add_minus' # Allow the use of - as primary token for negative numbers
211
+ v = -explore_primary
212
+ when 'prim_not' # Allow the use of ! for booleans
213
+ v = !explore_primary
214
+ when 'prim_lparen'
215
+ v = parse_top_level
216
+ rtoken = @lexer.advance()
217
+ raise SyntaxError.new("Missing right parenthesis.") if(rtoken.name != 'prim_rparen')
218
+ else raise SyntaxError.new("Cannot understand: #{token.value}.")
219
+ end
220
+ v
221
+ end
222
+
223
+ def constant_is_float?(const)
224
+ Float(const) rescue false
225
+ end
226
+
227
+ def constant_is_int?(const)
228
+ Integer(const) rescue false
229
+ end
230
+
231
+ def constant_is_string?(const)
232
+ return false if const.length < 2
233
+ f = const[0]
234
+ l = const[-1]
235
+ return ( f == l && (l == "'" || l == '"') )
236
+ end
237
+
238
+ def cast_constant(const)
239
+ if constant_is_int?(const)
240
+ const.to_i
241
+ elsif constant_is_float?(const)
242
+ const.to_f
243
+ elsif const[/^\'(.*)\'$/] || const[/^\"(.*)\"$/]
244
+ $1
245
+ elsif const == 'true'
246
+ true
247
+ elsif const == 'false'
248
+ false
249
+ elsif const == 'nil'
250
+ nil
251
+ elsif(@vars[const] != nil)
252
+ @vars[const]
253
+ else
254
+ raise SyntaxError.new("Cannot understand constant '#{const}'.")
255
+ end
256
+ end
257
+ end
258
+ end
259
+ end
260
+ end
261
+
262
+ =begin
263
+ l = Glaemscribe::API::Eval::Parser.new
264
+ puts l.parse("2+2+2", {})
265
+ puts l.parse("1>2 && 2<3",{})
266
+ puts l.parse("option == OPTION",{:option => 2, :OPTION => 2})
267
+ puts l.parse("option == (OPTION && false)",{:option => 2, :OPTION => 2})
268
+ =end
@@ -0,0 +1,113 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ #
23
+ # A Fragment is a sequence of equivalences
24
+ # For example h(a|ä)(i|ï) represents the four combinations:
25
+ # hai, haï, häi, häï
26
+
27
+ module Glaemscribe
28
+ module API
29
+ class Fragment
30
+
31
+ attr_reader :sheaf
32
+ attr_reader :rule
33
+ attr_reader :mode
34
+ attr_reader :combinations
35
+
36
+ def src?; @sheaf.src?; end
37
+ def dst?; @sheaf.dst?; end
38
+
39
+ EQUIVALENCE_SEPARATOR = ","
40
+
41
+ EQUIVALENCE_RX_OUT = /(\(.*?\))/
42
+ EQUIVALENCE_RX_IN = /\((.*?)\)/
43
+
44
+ # Should pass a fragment expression, e.g. : "h(a|ä)(i|ï)"
45
+ def initialize(sheaf, expression)
46
+ @sheaf = sheaf
47
+ @mode = sheaf.mode
48
+ @rule = sheaf.rule
49
+ @expression = expression
50
+
51
+ # Split the fragment, turn it into an array of arrays, e.g. [[h],[a,ä],[i,ï]]
52
+ equivalences = expression.split(EQUIVALENCE_RX_OUT).map{ |eq| eq.strip }
53
+ equivalences = equivalences.map{ |eq|
54
+ eq =~ EQUIVALENCE_RX_IN
55
+ if $1
56
+ eq = $1.split(EQUIVALENCE_SEPARATOR,-1).map{ |elt|
57
+ elt = elt.strip
58
+ if elt.empty?
59
+ @rule.errors << "Null members are not allowed in equivalences!"
60
+ return
61
+ end
62
+ elt.split(/\s/)
63
+ }
64
+ else
65
+ eq = [eq.split(/\s/)] # This equivalence has only one possibility
66
+ end
67
+ }
68
+
69
+ equivalences = [[[""]]] if equivalences.empty?
70
+
71
+ # In the case of a destination fragment, check that all symbols used are found
72
+ # in the charsets used by the mode
73
+ if dst?
74
+ mode = @sheaf.mode
75
+ equivalences.each{ |eq|
76
+ eq.each{ |member|
77
+ member.each{ |token|
78
+ next if token.empty? # NULL case
79
+ mode.supported_charsets.each{ |charset_name, charset|
80
+ symbol = charset[token]
81
+ if !symbol
82
+ @rule.errors << "Symbol #{token} not found in charset '#{charset.name}'!"
83
+ return
84
+ end
85
+ }
86
+ }
87
+ }
88
+ }
89
+ end
90
+
91
+ # Calculate all combinations for this fragment (productize the array of arrays)
92
+ res = equivalences[0]
93
+
94
+ (equivalences.length-1).times { |i|
95
+ prod = res.product(equivalences[i+1]).map{ |x,y| x+y}
96
+ res = prod
97
+ }
98
+
99
+ @combinations = res
100
+ end
101
+
102
+
103
+ def p
104
+ ret = "---- " + @expression + "\n"
105
+ @combinations.each{ |c|
106
+ ret += "------ " + c.inspect + "\n"
107
+ }
108
+ ret
109
+ end
110
+
111
+ end
112
+ end
113
+ end