glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
data/lib/api/rule.rb ADDED
@@ -0,0 +1,99 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class Rule
26
+
27
+ attr_accessor :line
28
+ attr_accessor :src_sheaf_chain, :dst_sheaf_chain
29
+ attr_reader :sub_rules
30
+ attr_reader :mode
31
+ attr_reader :errors
32
+
33
+ def initialize(line, rule_group)
34
+ @line = line
35
+ @rule_group = rule_group
36
+ @mode = @rule_group.mode
37
+ @sub_rules = []
38
+ @errors = []
39
+ end
40
+
41
+ def finalize(cross_schema)
42
+
43
+ if(@errors.any?)
44
+ @errors.each { |e|
45
+ @mode.errors << Glaeml::Error.new(@line, e)
46
+ }
47
+ return
48
+ end
49
+
50
+ srccounter = SheafChainIterator.new(@src_sheaf_chain)
51
+ dstcounter = SheafChainIterator.new(@dst_sheaf_chain, cross_schema)
52
+
53
+ if(srccounter.errors.any?)
54
+ srccounter.errors.each{ |e| @mode.errors << Glaeml::Error.new(@line, e) }
55
+ return
56
+ end
57
+
58
+ if(dstcounter.errors.any?)
59
+ dstcounter.errors.each{ |e| @mode.errors << Glaeml::Error.new(@line, e) }
60
+ return
61
+ end
62
+
63
+ srcp = srccounter.prototype
64
+ dstp = dstcounter.prototype
65
+
66
+ if srcp != dstp
67
+ @mode.errors << Glaeml::Error.new(@line, "Source and destination are not compatible (#{srcp} vs #{dstp})")
68
+ return
69
+ end
70
+
71
+ begin
72
+
73
+ # All equivalent combinations ...
74
+ src_combinations = srccounter.combinations
75
+
76
+ # ... should be sent to one destination
77
+ dst_combination = dstcounter.combinations.first
78
+
79
+ src_combinations.each{ |src_combination|
80
+ @sub_rules << SubRule.new(self, src_combination, dst_combination)
81
+ }
82
+
83
+ dstcounter.iterate()
84
+ end while srccounter.iterate()
85
+
86
+ end
87
+
88
+ def p
89
+ ret = ("=" * 30) + "\n"
90
+ @sub_rules.each{ |sr|
91
+ ret += sr.p
92
+ }
93
+ ret
94
+ end
95
+
96
+ end
97
+ end
98
+ end
99
+
@@ -0,0 +1,159 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class RuleGroup
26
+
27
+ VAR_NAME_REGEXP = /{([0-9A-Z_]+)}/
28
+ VAR_DECL_REGEXP = /^\s*{([0-9A-Z_]+)}\s+===\s+(.+?)\s*$/
29
+ RULE_REGEXP = /^\s*(.*?)\s+-->\s+(.+?)\s*$/
30
+ CROSS_RULE_REGEXP = /^\s*(.*?)\s+-->\s+([\s0-9,]+)\s+-->\s+(.+?)\s*$/
31
+
32
+ attr_reader :root_code_block, :name, :mode, :in_charset, :rules
33
+
34
+ def initialize(mode,name)
35
+ @name = name
36
+ @mode = mode
37
+ @root_code_block = IfTree::CodeBlock.new
38
+ end
39
+
40
+ def add_var(var_name, value)
41
+ @vars[var_name] = value
42
+ end
43
+
44
+ # Replace all vars in expression
45
+ def apply_vars(line, string)
46
+ ret = string.gsub(VAR_NAME_REGEXP) { |cap_var|
47
+ rep = @vars[$1]
48
+ if !rep
49
+ @mode.errors << Glaeml::Error.new(line, "In expression: #{string}: failed to evaluate variable: #{cap_var}.")
50
+ return nil
51
+ end
52
+ rep
53
+ }
54
+ ret
55
+ end
56
+
57
+ def descend_if_tree(code_block, trans_options)
58
+ code_block.terms.each{ |term|
59
+ if(term.is_code_lines?)
60
+ term.code_lines.each{ |cl|
61
+ finalize_code_line(cl)
62
+ }
63
+ else
64
+ term.if_conds.each{ |if_cond|
65
+
66
+ if_eval = Eval::Parser.new()
67
+
68
+ begin
69
+ if(if_eval.parse(if_cond.expression,trans_options) == true)
70
+ descend_if_tree(if_cond.child_code_block, trans_options)
71
+ break
72
+ end
73
+ rescue IfEvalError => e
74
+ @mode.errors << Glaeml::Error.new(if_cond.line, "Failed to evaluate condition '#{if_cond.expression}' (#{e})")
75
+ end
76
+
77
+ }
78
+ end
79
+ }
80
+ end
81
+
82
+ def finalize_rule(line, match_exp, replacement_exp, cross_schema = nil)
83
+
84
+ match = apply_vars(line, match_exp)
85
+ replacement = apply_vars(line, replacement_exp)
86
+
87
+ return if !match || !replacement # Failed
88
+
89
+ rule = Rule.new(line, self)
90
+ rule.src_sheaf_chain = SheafChain.new(rule,match,true)
91
+ rule.dst_sheaf_chain = SheafChain.new(rule,replacement,false)
92
+
93
+ rule.finalize(cross_schema)
94
+
95
+ self.rules << rule
96
+ end
97
+
98
+ def finalize_code_line(code_line)
99
+ begin
100
+
101
+ if code_line.expression =~ VAR_DECL_REGEXP
102
+
103
+ var_name = $1
104
+ var_value_ex = $2
105
+ var_value = apply_vars(code_line.line, var_value_ex)
106
+
107
+ if !var_value
108
+ @mode.errors << Glaeml::Error.new(code_line.line, "Thus, variable {#{var_name}} could not be declared.")
109
+ return
110
+ end
111
+
112
+ add_var(var_name,var_value)
113
+
114
+ elsif code_line.expression =~ CROSS_RULE_REGEXP
115
+
116
+ match = $1
117
+ cross = $2
118
+ replacement = $3
119
+
120
+ finalize_rule(code_line.line, match, replacement, cross)
121
+
122
+ elsif code_line.expression =~ RULE_REGEXP
123
+
124
+ match = $1
125
+ replacement = $2
126
+
127
+ finalize_rule(code_line.line, match, replacement)
128
+
129
+ elsif code_line.expression.empty?
130
+ # puts "Empty"
131
+ else
132
+ @mode.errors << Glaeml::Error.new(code_line.line,"Cannot understand: #{code_line.expression}")
133
+ end
134
+ end
135
+ end
136
+
137
+ def finalize(trans_options)
138
+ @vars = {}
139
+ @in_charset = {}
140
+ @rules = []
141
+
142
+ add_var("NULL","")
143
+
144
+ descend_if_tree(@root_code_block, trans_options)
145
+
146
+ # Now that we have selected our rules, create the in_charset of the rule_group
147
+ rules.each{ |r|
148
+ r.sub_rules.each { |sr|
149
+ sr.src_combination.join("").split(//).each{ |inchar|
150
+ # Add the character to the map of input characters
151
+ # Ignore '_' (bounds of word) and '|' (word breaker)
152
+ @in_charset[inchar] = self if inchar != WORD_BREAKER && inchar != WORD_BOUNDARY
153
+ }
154
+ }
155
+ }
156
+ end
157
+ end
158
+ end
159
+ end
data/lib/api/sheaf.rb ADDED
@@ -0,0 +1,70 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ #
23
+ # A Sheaf is a bundle of Fragments. They are used to factorize the writing process of rules, and thus represent parallel rules.
24
+ # For exemple [(a|ä),b,c] => [1,2,3] means that we send one sheaf to another, defining 4 rules:
25
+ # a => 1
26
+ # ä => 1
27
+ # b => 2
28
+ # c => 3
29
+
30
+ module Glaemscribe
31
+ module API
32
+ class Sheaf
33
+
34
+ attr_reader :fragments
35
+ attr_reader :sheaf_chain
36
+ attr_reader :mode
37
+ attr_reader :rule
38
+
39
+ SHEAF_SEPARATOR = "*"
40
+
41
+ def src?; @sheaf_chain.src?; end
42
+ def dst?; @sheaf_chain.dst?; end
43
+
44
+ # Should pass a sheaf expression, e.g. : "h, s, t"
45
+ def initialize(sheaf_chain, expression)
46
+
47
+ @sheaf_chain = sheaf_chain
48
+ @mode = sheaf_chain.mode
49
+ @rule = sheaf_chain.rule
50
+ @expression = expression
51
+
52
+ # Split members using "*" separator, KEEP NULL MEMBERS (this is legal)
53
+ fragment_exps = expression.split(SHEAF_SEPARATOR,-1).map{|fragment_exp| fragment_exp.strip }
54
+ fragment_exps = [""] if fragment_exps.empty? # For NULL
55
+
56
+ # Build the fragments inside
57
+ @fragments = fragment_exps.map{ |fragment_exp| Fragment.new(self, fragment_exp) }
58
+ end
59
+
60
+ def p
61
+ ret = "-- " + @expression + "\n"
62
+ @fragments.each{ |l|
63
+ ret += l.p
64
+ }
65
+ ret
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,86 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ #
23
+ # A sheaf chain is a sequence of sheaves. e.g. :
24
+ #
25
+ # With a global rule of : src => res
26
+ # Where src = "[a,b,c][d,e,f]"
27
+ # and res = "[x,y,z][1,2,3]"
28
+ #
29
+ # The generated rules is a list of 9 parallel rules:
30
+ # ad => x1, ae => x2, af => res => x3
31
+ # bd => y1, be => y2, etc...
32
+ #
33
+ # Or, more complicated: "[m,(b|p)](h|y)[a,e]"
34
+ # Will generate the following equivalences:
35
+ # mha = mya
36
+ # mhe = mye
37
+ # bha = pha = bya = pya
38
+ # bhe = phe = bye = phe
39
+
40
+ module Glaemscribe
41
+ module API
42
+ class SheafChain
43
+
44
+ SHEAF_REGEXP_IN = /\[(.*?)\]/
45
+ SHEAF_REGEXP_OUT = /(\[.*?\])/
46
+
47
+ attr_reader :is_src
48
+ attr_reader :sheaves
49
+ attr_reader :mode
50
+ attr_reader :rule
51
+
52
+ def src? ; is_src ; end
53
+ def dst? ; !is_src ; end
54
+
55
+ # Pass in the whole member of a rule src => dst (src or dst)
56
+ def initialize(rule, expression, is_src)
57
+ @rule = rule
58
+ @mode = rule.mode
59
+ @is_src = is_src
60
+ @expression = expression
61
+
62
+ # Split expression with '[...]' patterns. e.g. 'b[a*c*d]e' => [b, a*c*d, e]
63
+ sheaf_exps = expression.split(SHEAF_REGEXP_OUT).map{ |elt| elt.strip }.reject{ |elt| elt.empty? }
64
+ sheaf_exps = sheaf_exps.map { |sheaf_exp|
65
+ sheaf_exp =~ SHEAF_REGEXP_IN
66
+ sheaf_exp = $1 if $1 # Take the interior of the brackets it was a [...] expression
67
+ sheaf_exp.strip
68
+ }
69
+
70
+ @sheaves = sheaf_exps.map{ |sheaf_exp| Sheaf.new(self,sheaf_exp) }
71
+ @sheaves = [Sheaf.new(self,"")] if @sheaves.empty?
72
+ end
73
+
74
+ def p
75
+ ret = ("*" * 30)
76
+ ret += "\n"
77
+ ret += @expression + "\n"
78
+ @sheaves.each{ |s|
79
+ ret += s.p
80
+ }
81
+ ret
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,108 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class SheafChainIterator
26
+
27
+ attr_accessor :sheaf_chain
28
+ attr_accessor :cross_map
29
+ attr_accessor :errors
30
+
31
+ # If a cross schema is passed, the prototype of the chain will be permutated
32
+ def initialize(sheaf_chain, cross_schema = nil)
33
+ @sheaf_chain = sheaf_chain
34
+ # Sizes contains the number of fragments/sheaf
35
+ @sizes = sheaf_chain.sheaves.map { |sheaf| sheaf.fragments.count }
36
+ # An array of counters, one for each sheaf, to increment on fragments
37
+ @iterators = Array.new(@sizes.count,0)
38
+
39
+ @errors = []
40
+
41
+ # Construct the identity array
42
+ identity_cross_array = []
43
+ sheaf_count = sheaf_chain.sheaves.count
44
+ sheaf_count.times{|i| identity_cross_array << i+1}
45
+
46
+ # Construct the cross array
47
+ if cross_schema
48
+ @cross_array = cross_schema.split(",").map{ |i| i.to_i }
49
+ ca_count = @cross_array.count
50
+ @errors << "#{sheaf_count} sheafs found in right predicate, but #{ca_count} elements in cross rule." if ca_count != sheaf_count
51
+ @errors << "Cross rule should contain each element of #{identity_cross_array} once and only once." if identity_cross_array != @cross_array.sort
52
+ else
53
+ @cross_array = identity_cross_array
54
+ end
55
+ end
56
+
57
+ # Calculate the prototype of the chain
58
+ def prototype
59
+ res = @sizes.clone
60
+ res2 = @sizes.clone
61
+
62
+ res.count.times{ |i| res2[i] = res[@cross_array[i]-1] }
63
+ res = res2
64
+
65
+ # Remove all sheaves of size 1 (which are constant)
66
+ res.delete(1)
67
+
68
+ # Create a prototype string
69
+ res = res.join("x")
70
+ res = "1" if res.empty?
71
+ res
72
+ end
73
+
74
+ def iterate
75
+ pos = 0
76
+ while pos < @sizes.count do
77
+ realpos = @cross_array[pos]-1
78
+ @iterators[realpos] += 1
79
+ if @iterators[realpos] >= @sizes[realpos]
80
+ @iterators[realpos] = 0
81
+ pos += 1
82
+ else
83
+ return true
84
+ end
85
+ end
86
+ # Wrapped!
87
+ return false
88
+ end
89
+
90
+ # Calculate all cominations for the chain
91
+ def combinations
92
+ resolved = []
93
+ @iterators.each_with_index{ |counter, index|
94
+ sheaf = sheaf_chain.sheaves[index]
95
+ fragment = sheaf.fragments[counter]
96
+
97
+ resolved << fragment.combinations
98
+ }
99
+ res = resolved[0]
100
+ (resolved.count-1).times { |i|
101
+ res = res.product(resolved[i+1]).map{|e1,e2| e1+e2}
102
+ }
103
+ res
104
+ end
105
+
106
+ end
107
+ end
108
+ end