glaemscribe 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
data/lib/api/option.rb ADDED
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class Option
27
+ attr_reader :name
28
+ attr_reader :type
29
+ attr_reader :default_value_name
30
+ attr_reader :values
31
+
32
+ class Type
33
+ ENUM = "ENUM"
34
+ BOOL = "BOOL"
35
+ end
36
+
37
+ def initialize(name, default_value_name, values)
38
+ @name = name
39
+ @default_value_name = default_value_name
40
+ @type = (values.keys.count == 0)?(Type::BOOL):(Type::ENUM)
41
+ @values = values
42
+ end
43
+
44
+ def default_value
45
+ if @type == Type::BOOL
46
+ (@default_value_name == 'true')
47
+ else
48
+ @values[@default_value_name]
49
+ end
50
+ end
51
+
52
+ def value_for_value_name(val_name)
53
+ if @type == Type::BOOL
54
+ return true if(val_name == 'true' || val_name == true)
55
+ return false if(val_name == 'false' || val_name == false)
56
+ return nil
57
+ else
58
+ return @values[val_name]
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class ReversePostProcessorOperator < PostProcessorOperator
27
+
28
+ def apply(l)
29
+ l.reverse
30
+ end
31
+ end
32
+
33
+ ResourceManager::register_post_processor_class("reverse", ReversePostProcessorOperator)
34
+
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class DowncasePreProcessorOperator < PreProcessorOperator
27
+ def apply(l)
28
+ UnicodeUtils.downcase(l)
29
+ end
30
+ end
31
+
32
+ ResourceManager::register_pre_processor_class("downcase", DowncasePreProcessorOperator)
33
+
34
+ end
35
+ end
@@ -0,0 +1,47 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class ElvishNumbersPreProcessorOperator < PreProcessorOperator
27
+
28
+ def apply(l)
29
+ base = args[0]
30
+ base = (base)?(base.to_i):(12)
31
+
32
+ reverse = args[1]
33
+ reverse = (reverse != nil)?(reverse == "true" || reverse == true):(true)
34
+
35
+ l.gsub(/\d+/) { |f|
36
+ ret = f.to_i.to_s(base).upcase()
37
+ ret = ret.reverse if(reverse)
38
+ ret
39
+ }
40
+ end
41
+
42
+ ResourceManager::register_pre_processor_class("elvish_numbers", ElvishNumbersPreProcessorOperator)
43
+
44
+ end
45
+ end
46
+ end
47
+
@@ -0,0 +1,40 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+
24
+ module Glaemscribe
25
+ module API
26
+
27
+ class RxSubstitutePreProcessorOperator < PreProcessorOperator
28
+
29
+ def apply(l)
30
+ what = /#{@args[0]}/
31
+ with = @args[1]
32
+ l.gsub(what, with)
33
+ end
34
+
35
+ ResourceManager::register_pre_processor_class("rxsubstitute", RxSubstitutePreProcessorOperator)
36
+
37
+ end
38
+ end
39
+ end
40
+
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class SubstitutePreProcessorOperator < PreProcessorOperator
27
+
28
+ def apply(l)
29
+ what = @args[0]
30
+ with = @args[1]
31
+ l.gsub(what, with)
32
+ end
33
+ end
34
+
35
+ ResourceManager::register_pre_processor_class("substitute", SubstitutePreProcessorOperator)
36
+
37
+ end
38
+ end
@@ -0,0 +1,138 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class UpDownTehtaSplitPreProcessorOperator < PreProcessorOperator
27
+
28
+ attr_reader :vowel_list, :consonant_list
29
+ def initialize(args)
30
+ super(args)
31
+
32
+ vowel_list = args[0]
33
+ consonant_list = args[1]
34
+
35
+ vowel_list = vowel_list.split(/,/).map{|s| s.strip}
36
+ consonant_list = consonant_list.split(/,/).map{|s| s.strip}
37
+
38
+ @vowel_map = {} # Recognize vowel tokens
39
+ @consonant_map = {} # Recognize consonant tokens
40
+ @splitter_tree = TranscriptionTreeNode.new(nil,nil) # Recognize tokens
41
+ @word_split_map = {}
42
+ # The word split map will help to recognize words
43
+ # The splitter tree will help to split words into tokens
44
+
45
+ vowel_list.each { |v| @splitter_tree.add_subpath(v, v); @vowel_map[v] = v }
46
+ consonant_list.each { |c| @splitter_tree.add_subpath(c, c); @consonant_map[c] = c}
47
+
48
+ all_letters = (vowel_list + consonant_list).join("").split(//).sort.uniq
49
+ all_letters.each{ |l| @word_split_map[l] = l }
50
+ end
51
+
52
+ def type_of(token)
53
+ if @vowel_map[token]
54
+ return "V"
55
+ elsif @consonant_map[token]
56
+ return "C"
57
+ else
58
+ return "X"
59
+ end
60
+ end
61
+
62
+ def apply_to_word(w)
63
+ res = []
64
+
65
+ if w.strip.empty?
66
+ res << w
67
+ else
68
+ while w.length != 0
69
+ r, len = @splitter_tree.transcribe(w)
70
+
71
+ if r != [UNKNOWN_CHAR_OUTPUT]
72
+ res << r
73
+ else
74
+ res << w[0..0] # r
75
+ end
76
+
77
+ w = w[len..-1]
78
+ end
79
+ end
80
+
81
+ res_modified = []
82
+
83
+ # We replace the pattern CVC by CvVC where v is a phantom vowel.
84
+ # This makes the pattern CVC not possible.
85
+ i = 0
86
+ while i < res.count - 2 do
87
+
88
+ r0 = res[i]
89
+ r1 = res[i+1]
90
+ r2 = res[i+2]
91
+ t0 = type_of(r0)
92
+ t1 = type_of(r1)
93
+ t2 = type_of(r2)
94
+
95
+ if t0 == "C" && t1 == "V" && t2 == "C"
96
+ res_modified << res[i]
97
+ res_modified << "@"
98
+ res_modified << res[i+1]
99
+ i += 2
100
+ else
101
+ res_modified << res[i]
102
+ i += 1
103
+ end
104
+ end
105
+
106
+ # Add the remaining stuff
107
+ while i < res.count
108
+ res_modified << res[i]
109
+ i += 1
110
+ end
111
+
112
+ return res_modified.join("")
113
+ end
114
+
115
+ def apply(content)
116
+ accumulated_word = ""
117
+
118
+ ret = ""
119
+
120
+ content.split(//).each{ |letter|
121
+ if @word_split_map[letter]
122
+ accumulated_word += letter
123
+ else
124
+ ret += apply_to_word(accumulated_word)
125
+ ret += letter
126
+ accumulated_word = ""
127
+ end
128
+ }
129
+ ret += apply_to_word(accumulated_word)
130
+ ret
131
+ end
132
+
133
+ end
134
+
135
+ ResourceManager::register_pre_processor_class("up_down_tehta_split", UpDownTehtaSplitPreProcessorOperator)
136
+
137
+ end
138
+ end
@@ -0,0 +1,130 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ module Glaemscribe
23
+ module API
24
+ module ResourceManager
25
+
26
+ MODE_PATH = File.dirname(__FILE__) + "/../../glaemresources/modes/"
27
+ MODE_EXT = "glaem"
28
+
29
+ CHARSET_PATH = File.dirname(__FILE__) + "/../../glaemresources/charsets/"
30
+ CHARSET_EXT = "cst"
31
+
32
+ ALL = ["*"]
33
+
34
+ @loaded_modes = {}
35
+ @loaded_charsets = {}
36
+
37
+ @pre_processor_operator_classes = {}
38
+ @post_processor_operator_classes = {}
39
+
40
+ def self.available_mode_names
41
+ Dir.glob(MODE_PATH + "*.#{MODE_EXT}").map { |mode_file|
42
+ self.mode_name_from_file_path(mode_file)
43
+ }
44
+ end
45
+
46
+ def self.loaded_modes
47
+ @loaded_modes
48
+ end
49
+
50
+ def self.loaded_charsets
51
+ @loaded_charsets
52
+ end
53
+
54
+ def self.register_pre_processor_class(operator_name, operator_class)
55
+ @pre_processor_operator_classes[operator_name] = operator_class
56
+ end
57
+
58
+ def self.register_post_processor_class(operator_name, operator_class)
59
+ @post_processor_operator_classes[operator_name] = operator_class
60
+ end
61
+
62
+ def self.class_for_pre_processor_operator_name(operator_name)
63
+ @pre_processor_operator_classes[operator_name]
64
+ end
65
+
66
+ def self.class_for_post_processor_operator_name(operator_name)
67
+ @post_processor_operator_classes[operator_name]
68
+ end
69
+
70
+ def self.p
71
+ puts @pre_processor_operator_classes.inspect
72
+ puts @post_processor_operator_classes.inspect
73
+ end
74
+
75
+ def self.mode_name_from_file_path(file_path)
76
+ File.basename(file_path,".*")
77
+ end
78
+
79
+ def self.charset_name_from_file_path(file_path)
80
+ File.basename(file_path,".*")
81
+ end
82
+
83
+ def self.load_modes(which_ones = ALL)
84
+
85
+ which_ones = [which_ones] if(which_ones.is_a?(String))
86
+
87
+ Dir.glob(MODE_PATH + "*.#{MODE_EXT}") { |mode_file|
88
+
89
+ mode_name = self.mode_name_from_file_path(mode_file)
90
+
91
+ next if(which_ones != ALL && !which_ones.include?(mode_name))
92
+ next if(@loaded_modes.include? mode_name) # Don't load a charset twice
93
+
94
+ API::Debug::log("*" * 20)
95
+ API::Debug::log("Parsing Mode : #{mode_name}")
96
+ API::Debug::log("*" * 20)
97
+
98
+ mode = API::ModeParser.new().parse(mode_file)
99
+ @loaded_modes[mode.name] = mode if mode
100
+ }
101
+ end
102
+
103
+ def self.load_charsets(which_ones = ALL)
104
+
105
+ which_ones = [which_ones] if(which_ones.is_a?(String))
106
+
107
+ Dir.glob(CHARSET_PATH + "*.#{CHARSET_EXT}") { |charset_file|
108
+
109
+ charset_name = self.charset_name_from_file_path(charset_file)
110
+
111
+ next if(which_ones != ALL && !which_ones.include?(charset_name))
112
+ next if(@loaded_charsets.include? charset_name) # Don't load a charset twice
113
+
114
+ API::Debug::log("*" * 20)
115
+ API::Debug::log("Parsing Charset : #{charset_name}")
116
+ API::Debug::log("*" * 20)
117
+
118
+ charset = API::CharsetParser.new().parse(charset_file)
119
+
120
+ @loaded_charsets[charset.name] = charset if charset
121
+ }
122
+ end
123
+
124
+ def self.charset(name)
125
+ @loaded_charsets[name]
126
+ end
127
+
128
+ end
129
+ end
130
+ end