glaemscribe 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
@@ -0,0 +1,40 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class SubRule
26
+
27
+ attr_reader :src_combination, :dst_combination
28
+
29
+ def initialize(rule, src_combination, dst_combination)
30
+ @src_combination = src_combination
31
+ @dst_combination = dst_combination
32
+ end
33
+
34
+ def p
35
+ "#{@src_combination.inspect} => #{@dst_combination.inspect} \n"
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,118 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class PrePostProcessorOperator
27
+ attr_reader :args
28
+
29
+ def initialize(raw_args)
30
+ @raw_args = raw_args
31
+ end
32
+
33
+ def eval_arg(arg, trans_options)
34
+ return nil if arg.nil?
35
+ if arg =~ /^\\eval\s/
36
+ to_eval = $'
37
+ return Eval::Parser.new().parse(to_eval, trans_options)
38
+ end
39
+ return arg
40
+ end
41
+
42
+ def finalize(trans_options)
43
+ @args = []
44
+ @raw_args.each{ |arg|
45
+ @args << eval_arg(arg, trans_options)
46
+ }
47
+ end
48
+
49
+ def apply(l)
50
+ raise "Pure virtual method, should be overloaded."
51
+ end
52
+ end
53
+
54
+ class TranscriptionPrePostProcessor
55
+ attr_reader :root_code_block
56
+
57
+ attr_reader :operators
58
+
59
+ def initialize(mode)
60
+ @mode = mode
61
+ @root_code_block = IfTree::CodeBlock.new
62
+ end
63
+
64
+ def descend_if_tree(code_block, trans_options)
65
+ code_block.terms.each{ |term|
66
+ if(term.is_pre_post_processor_operators?)
67
+ term.operators.each{ |operator|
68
+ @operators << operator
69
+ }
70
+ else
71
+ term.if_conds.each{ |if_cond|
72
+
73
+ if_eval = Eval::Parser.new()
74
+
75
+ if(if_eval.parse(if_cond.expression, trans_options) == true)
76
+ descend_if_tree(if_cond.child_code_block, trans_options)
77
+ break
78
+ end
79
+ }
80
+ end
81
+ }
82
+ end
83
+
84
+ def finalize(trans_options)
85
+ @operators = []
86
+ # Select operators depending on conditions
87
+ descend_if_tree(@root_code_block, trans_options)
88
+ # Reevaluate operator arguments
89
+ @operators.each{ |op|
90
+ op.finalize(trans_options)
91
+ }
92
+ end
93
+
94
+ # Apply all preprocessor rules consecutively
95
+ def apply(l)
96
+ ret = l
97
+ @operators.each{ |operator|
98
+ ret = operator.apply(ret)
99
+ }
100
+ ret
101
+ end
102
+
103
+ end
104
+
105
+ class PreProcessorOperator < PrePostProcessorOperator
106
+ end
107
+
108
+ class TranscriptionPreProcessor < TranscriptionPrePostProcessor
109
+ end
110
+
111
+ class PostProcessorOperator < PrePostProcessorOperator
112
+ end
113
+
114
+ class TranscriptionPostProcessor < TranscriptionPrePostProcessor
115
+ end
116
+
117
+ end
118
+ end
@@ -0,0 +1,137 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class TranscriptionProcessor
26
+
27
+ attr_reader :rule_groups
28
+ attr_reader :mode
29
+
30
+ attr_accessor :out_space
31
+
32
+ def initialize(mode)
33
+ @mode = mode
34
+ @rule_groups = {}
35
+ end
36
+
37
+ def add_subrule(sub_rule)
38
+ path = sub_rule.src_combination.join("")
39
+ @transcription_tree.add_subpath(path, sub_rule.dst_combination)
40
+ end
41
+
42
+ def finalize(trans_options)
43
+ @errors = []
44
+
45
+ @transcription_tree = TranscriptionTreeNode.new(nil,nil)
46
+
47
+ # Add WORD_BOUNDARY and WORD_BREAKER in the tree
48
+ @transcription_tree.add_subpath(WORD_BOUNDARY, [""])
49
+ @transcription_tree.add_subpath(WORD_BREAKER, [""])
50
+
51
+ rule_groups.each{ |rgname, rg|
52
+ rg.finalize(trans_options)
53
+ }
54
+
55
+ # Build the input charset
56
+ @in_charset = {}
57
+ rule_groups.each{ |rgname, rg|
58
+ rg.in_charset.each{ |char, group|
59
+ group_for_char = @in_charset[char]
60
+ if group_for_char
61
+ mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
62
+ else
63
+ @in_charset[char] = group
64
+ end
65
+ }
66
+ }
67
+
68
+ # Build the transcription tree
69
+ rule_groups.each{ |rgname, rg|
70
+ rg.rules.each { |r|
71
+ r.sub_rules.each{ |sr|
72
+ add_subrule(sr)
73
+ }
74
+ }
75
+ }
76
+ end
77
+
78
+ def apply(l, out_charset)
79
+ ret = ""
80
+ current_group = nil
81
+ accumulated_word = ""
82
+
83
+ out_space_str = " "
84
+ out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
85
+
86
+ l.split("").each{ |c|
87
+ case c
88
+ when " ", "\t"
89
+ ret += transcribe_word(accumulated_word, out_charset)
90
+ ret += out_space_str
91
+
92
+ accumulated_word = ""
93
+ when "\r", "\n"
94
+ ret += transcribe_word(accumulated_word, out_charset)
95
+ ret += c
96
+
97
+ accumulated_word = ""
98
+ else
99
+ c_group = @in_charset[c]
100
+ if c_group == current_group
101
+ accumulated_word += c
102
+ else
103
+ ret += transcribe_word(accumulated_word, out_charset)
104
+ current_group = c_group
105
+ accumulated_word = c
106
+ end
107
+ end
108
+ }
109
+ # Just in case
110
+ ret += transcribe_word(accumulated_word, out_charset)
111
+ ret
112
+ end
113
+
114
+ def transcribe_word(word, out_charset)
115
+ res = []
116
+ word = WORD_BOUNDARY + word + WORD_BOUNDARY
117
+ while word.length != 0
118
+ r, len = @transcription_tree.transcribe(word)
119
+ word = word[len..-1]
120
+ res += r
121
+ end
122
+ ret = ""
123
+ res.each{ |token|
124
+ case token
125
+ when ""
126
+ when UNKNOWN_CHAR_OUTPUT
127
+ ret += UNKNOWN_CHAR_OUTPUT
128
+ else
129
+ ret += out_charset[token].str
130
+ end
131
+ }
132
+ ret
133
+ end
134
+
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,91 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class TranscriptionTreeNode
26
+ attr_accessor :character, :siblings, :replacement
27
+
28
+ def initialize(character, replacement)
29
+ @character = character
30
+ @replacement = replacement
31
+ @siblings = {}
32
+ end
33
+
34
+ def _p
35
+ puts "Node has #{@siblings.keys.count} siblings."
36
+ @siblings.each{ |k,v|
37
+ puts "#{k}, effective: #{v.effective?}"
38
+ }
39
+ end
40
+
41
+ def _pchain(chain)
42
+ "[" + chain.map{|node| node.character||"ROOT"}.join(", ") + "]"
43
+ end
44
+
45
+ def effective?
46
+ !@replacement.nil?
47
+ end
48
+
49
+ def add_subpath(source, rep)
50
+ return if source.nil? || source.empty?
51
+ cc = source[0..0]
52
+
53
+ sibling = @siblings[cc]
54
+ sibling = TranscriptionTreeNode.new(cc, nil) if !sibling
55
+ @siblings[cc] = sibling
56
+
57
+ if source.length == 1
58
+ # Sibling is effective
59
+ sibling.replacement = rep
60
+ else
61
+ sibling.add_subpath(source[1..-1], rep)
62
+ end
63
+ end
64
+
65
+ def transcribe(string, chain=[])
66
+
67
+ chain << self
68
+
69
+ if !string.empty?
70
+ cc = string[0..0]
71
+ sibling = @siblings[cc]
72
+
73
+ if sibling
74
+ return sibling.transcribe(string[1..-1], chain)
75
+ end # Else we are at the end
76
+ end # Else we are at the end
77
+
78
+ # puts "End of chain: #{chain.count}, #{_pchain(chain)}"
79
+
80
+ # We are at the end of the chain
81
+ while chain.count > 1
82
+ last_node = chain.pop
83
+ return last_node.replacement, chain.count if last_node.effective?
84
+ end
85
+
86
+ # Only the root node is in the chain, we could not find anything; return the "unknown char"
87
+ return [UNKNOWN_CHAR_OUTPUT], 1
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,70 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+
24
+ # Following tools are in the standard lib
25
+ require "shellwords"
26
+ require "unicode_utils/downcase"
27
+ require "FileUtils" if !defined? FileUtils
28
+
29
+ module Glaemscribe
30
+ module API
31
+ API_PATH = File.dirname(__FILE__)
32
+
33
+ require API_PATH + "/api/debug.rb"
34
+ require API_PATH + "/api/constants.rb"
35
+
36
+ require API_PATH + "/api/fragment.rb"
37
+ require API_PATH + "/api/sheaf_chain_iterator.rb"
38
+ require API_PATH + "/api/sheaf_chain.rb"
39
+ require API_PATH + "/api/sheaf.rb"
40
+
41
+ require API_PATH + "/api/rule.rb"
42
+ require API_PATH + "/api/sub_rule.rb"
43
+ require API_PATH + "/api/rule_group.rb"
44
+
45
+ require API_PATH + "/api/eval.rb"
46
+ require API_PATH + "/api/if_tree.rb"
47
+
48
+ require API_PATH + "/api/transcription_tree_node.rb"
49
+
50
+ require API_PATH + "/api/transcription_pre_post_processor.rb"
51
+ require API_PATH + "/api/transcription_processor.rb"
52
+
53
+ require API_PATH + "/api/charset.rb"
54
+ require API_PATH + "/api/mode.rb"
55
+ require API_PATH + "/api/option.rb"
56
+
57
+ require API_PATH + "/api/resource_manager.rb"
58
+ require API_PATH + "/api/glaeml.rb"
59
+ require API_PATH + "/api/mode_parser.rb"
60
+ require API_PATH + "/api/charset_parser.rb"
61
+
62
+ require API_PATH + "/api/pre_processor/elvish_numbers.rb"
63
+ require API_PATH + "/api/pre_processor/downcase.rb"
64
+ require API_PATH + "/api/pre_processor/substitute.rb"
65
+ require API_PATH + "/api/pre_processor/rxsubstitute.rb"
66
+ require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
67
+ require API_PATH + "/api/post_processor/reverse.rb"
68
+
69
+ end
70
+ end