glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
@@ -0,0 +1,40 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class SubRule
26
+
27
+ attr_reader :src_combination, :dst_combination
28
+
29
+ def initialize(rule, src_combination, dst_combination)
30
+ @src_combination = src_combination
31
+ @dst_combination = dst_combination
32
+ end
33
+
34
+ def p
35
+ "#{@src_combination.inspect} => #{@dst_combination.inspect} \n"
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,118 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class PrePostProcessorOperator
27
+ attr_reader :args
28
+
29
+ def initialize(raw_args)
30
+ @raw_args = raw_args
31
+ end
32
+
33
+ def eval_arg(arg, trans_options)
34
+ return nil if arg.nil?
35
+ if arg =~ /^\\eval\s/
36
+ to_eval = $'
37
+ return Eval::Parser.new().parse(to_eval, trans_options)
38
+ end
39
+ return arg
40
+ end
41
+
42
+ def finalize(trans_options)
43
+ @args = []
44
+ @raw_args.each{ |arg|
45
+ @args << eval_arg(arg, trans_options)
46
+ }
47
+ end
48
+
49
+ def apply(l)
50
+ raise "Pure virtual method, should be overloaded."
51
+ end
52
+ end
53
+
54
+ class TranscriptionPrePostProcessor
55
+ attr_reader :root_code_block
56
+
57
+ attr_reader :operators
58
+
59
+ def initialize(mode)
60
+ @mode = mode
61
+ @root_code_block = IfTree::CodeBlock.new
62
+ end
63
+
64
+ def descend_if_tree(code_block, trans_options)
65
+ code_block.terms.each{ |term|
66
+ if(term.is_pre_post_processor_operators?)
67
+ term.operators.each{ |operator|
68
+ @operators << operator
69
+ }
70
+ else
71
+ term.if_conds.each{ |if_cond|
72
+
73
+ if_eval = Eval::Parser.new()
74
+
75
+ if(if_eval.parse(if_cond.expression, trans_options) == true)
76
+ descend_if_tree(if_cond.child_code_block, trans_options)
77
+ break
78
+ end
79
+ }
80
+ end
81
+ }
82
+ end
83
+
84
+ def finalize(trans_options)
85
+ @operators = []
86
+ # Select operators depending on conditions
87
+ descend_if_tree(@root_code_block, trans_options)
88
+ # Reevaluate operator arguments
89
+ @operators.each{ |op|
90
+ op.finalize(trans_options)
91
+ }
92
+ end
93
+
94
+ # Apply all preprocessor rules consecutively
95
+ def apply(l)
96
+ ret = l
97
+ @operators.each{ |operator|
98
+ ret = operator.apply(ret)
99
+ }
100
+ ret
101
+ end
102
+
103
+ end
104
+
105
+ class PreProcessorOperator < PrePostProcessorOperator
106
+ end
107
+
108
+ class TranscriptionPreProcessor < TranscriptionPrePostProcessor
109
+ end
110
+
111
+ class PostProcessorOperator < PrePostProcessorOperator
112
+ end
113
+
114
+ class TranscriptionPostProcessor < TranscriptionPrePostProcessor
115
+ end
116
+
117
+ end
118
+ end
@@ -0,0 +1,137 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class TranscriptionProcessor
26
+
27
+ attr_reader :rule_groups
28
+ attr_reader :mode
29
+
30
+ attr_accessor :out_space
31
+
32
+ def initialize(mode)
33
+ @mode = mode
34
+ @rule_groups = {}
35
+ end
36
+
37
+ def add_subrule(sub_rule)
38
+ path = sub_rule.src_combination.join("")
39
+ @transcription_tree.add_subpath(path, sub_rule.dst_combination)
40
+ end
41
+
42
+ def finalize(trans_options)
43
+ @errors = []
44
+
45
+ @transcription_tree = TranscriptionTreeNode.new(nil,nil)
46
+
47
+ # Add WORD_BOUNDARY and WORD_BREAKER in the tree
48
+ @transcription_tree.add_subpath(WORD_BOUNDARY, [""])
49
+ @transcription_tree.add_subpath(WORD_BREAKER, [""])
50
+
51
+ rule_groups.each{ |rgname, rg|
52
+ rg.finalize(trans_options)
53
+ }
54
+
55
+ # Build the input charset
56
+ @in_charset = {}
57
+ rule_groups.each{ |rgname, rg|
58
+ rg.in_charset.each{ |char, group|
59
+ group_for_char = @in_charset[char]
60
+ if group_for_char
61
+ mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
62
+ else
63
+ @in_charset[char] = group
64
+ end
65
+ }
66
+ }
67
+
68
+ # Build the transcription tree
69
+ rule_groups.each{ |rgname, rg|
70
+ rg.rules.each { |r|
71
+ r.sub_rules.each{ |sr|
72
+ add_subrule(sr)
73
+ }
74
+ }
75
+ }
76
+ end
77
+
78
+ def apply(l, out_charset)
79
+ ret = ""
80
+ current_group = nil
81
+ accumulated_word = ""
82
+
83
+ out_space_str = " "
84
+ out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
85
+
86
+ l.split("").each{ |c|
87
+ case c
88
+ when " ", "\t"
89
+ ret += transcribe_word(accumulated_word, out_charset)
90
+ ret += out_space_str
91
+
92
+ accumulated_word = ""
93
+ when "\r", "\n"
94
+ ret += transcribe_word(accumulated_word, out_charset)
95
+ ret += c
96
+
97
+ accumulated_word = ""
98
+ else
99
+ c_group = @in_charset[c]
100
+ if c_group == current_group
101
+ accumulated_word += c
102
+ else
103
+ ret += transcribe_word(accumulated_word, out_charset)
104
+ current_group = c_group
105
+ accumulated_word = c
106
+ end
107
+ end
108
+ }
109
+ # Just in case
110
+ ret += transcribe_word(accumulated_word, out_charset)
111
+ ret
112
+ end
113
+
114
+ def transcribe_word(word, out_charset)
115
+ res = []
116
+ word = WORD_BOUNDARY + word + WORD_BOUNDARY
117
+ while word.length != 0
118
+ r, len = @transcription_tree.transcribe(word)
119
+ word = word[len..-1]
120
+ res += r
121
+ end
122
+ ret = ""
123
+ res.each{ |token|
124
+ case token
125
+ when ""
126
+ when UNKNOWN_CHAR_OUTPUT
127
+ ret += UNKNOWN_CHAR_OUTPUT
128
+ else
129
+ ret += out_charset[token].str
130
+ end
131
+ }
132
+ ret
133
+ end
134
+
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,91 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class TranscriptionTreeNode
26
+ attr_accessor :character, :siblings, :replacement
27
+
28
+ def initialize(character, replacement)
29
+ @character = character
30
+ @replacement = replacement
31
+ @siblings = {}
32
+ end
33
+
34
+ def _p
35
+ puts "Node has #{@siblings.keys.count} siblings."
36
+ @siblings.each{ |k,v|
37
+ puts "#{k}, effective: #{v.effective?}"
38
+ }
39
+ end
40
+
41
+ def _pchain(chain)
42
+ "[" + chain.map{|node| node.character||"ROOT"}.join(", ") + "]"
43
+ end
44
+
45
+ def effective?
46
+ !@replacement.nil?
47
+ end
48
+
49
+ def add_subpath(source, rep)
50
+ return if source.nil? || source.empty?
51
+ cc = source[0..0]
52
+
53
+ sibling = @siblings[cc]
54
+ sibling = TranscriptionTreeNode.new(cc, nil) if !sibling
55
+ @siblings[cc] = sibling
56
+
57
+ if source.length == 1
58
+ # Sibling is effective
59
+ sibling.replacement = rep
60
+ else
61
+ sibling.add_subpath(source[1..-1], rep)
62
+ end
63
+ end
64
+
65
+ def transcribe(string, chain=[])
66
+
67
+ chain << self
68
+
69
+ if !string.empty?
70
+ cc = string[0..0]
71
+ sibling = @siblings[cc]
72
+
73
+ if sibling
74
+ return sibling.transcribe(string[1..-1], chain)
75
+ end # Else we are at the end
76
+ end # Else we are at the end
77
+
78
+ # puts "End of chain: #{chain.count}, #{_pchain(chain)}"
79
+
80
+ # We are at the end of the chain
81
+ while chain.count > 1
82
+ last_node = chain.pop
83
+ return last_node.replacement, chain.count if last_node.effective?
84
+ end
85
+
86
+ # Only the root node is in the chain, we could not find anything; return the "unknown char"
87
+ return [UNKNOWN_CHAR_OUTPUT], 1
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,70 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+
24
+ # Following tools are in the standard lib
25
+ require "shellwords"
26
+ require "unicode_utils/downcase"
27
+ require "FileUtils" if !defined? FileUtils
28
+
29
+ module Glaemscribe
30
+ module API
31
+ API_PATH = File.dirname(__FILE__)
32
+
33
+ require API_PATH + "/api/debug.rb"
34
+ require API_PATH + "/api/constants.rb"
35
+
36
+ require API_PATH + "/api/fragment.rb"
37
+ require API_PATH + "/api/sheaf_chain_iterator.rb"
38
+ require API_PATH + "/api/sheaf_chain.rb"
39
+ require API_PATH + "/api/sheaf.rb"
40
+
41
+ require API_PATH + "/api/rule.rb"
42
+ require API_PATH + "/api/sub_rule.rb"
43
+ require API_PATH + "/api/rule_group.rb"
44
+
45
+ require API_PATH + "/api/eval.rb"
46
+ require API_PATH + "/api/if_tree.rb"
47
+
48
+ require API_PATH + "/api/transcription_tree_node.rb"
49
+
50
+ require API_PATH + "/api/transcription_pre_post_processor.rb"
51
+ require API_PATH + "/api/transcription_processor.rb"
52
+
53
+ require API_PATH + "/api/charset.rb"
54
+ require API_PATH + "/api/mode.rb"
55
+ require API_PATH + "/api/option.rb"
56
+
57
+ require API_PATH + "/api/resource_manager.rb"
58
+ require API_PATH + "/api/glaeml.rb"
59
+ require API_PATH + "/api/mode_parser.rb"
60
+ require API_PATH + "/api/charset_parser.rb"
61
+
62
+ require API_PATH + "/api/pre_processor/elvish_numbers.rb"
63
+ require API_PATH + "/api/pre_processor/downcase.rb"
64
+ require API_PATH + "/api/pre_processor/substitute.rb"
65
+ require API_PATH + "/api/pre_processor/rxsubstitute.rb"
66
+ require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
67
+ require API_PATH + "/api/post_processor/reverse.rb"
68
+
69
+ end
70
+ end