glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
data/lib/api/option.rb ADDED
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class Option
27
+ attr_reader :name
28
+ attr_reader :type
29
+ attr_reader :default_value_name
30
+ attr_reader :values
31
+
32
+ class Type
33
+ ENUM = "ENUM"
34
+ BOOL = "BOOL"
35
+ end
36
+
37
+ def initialize(name, default_value_name, values)
38
+ @name = name
39
+ @default_value_name = default_value_name
40
+ @type = (values.keys.count == 0)?(Type::BOOL):(Type::ENUM)
41
+ @values = values
42
+ end
43
+
44
+ def default_value
45
+ if @type == Type::BOOL
46
+ (@default_value_name == 'true')
47
+ else
48
+ @values[@default_value_name]
49
+ end
50
+ end
51
+
52
+ def value_for_value_name(val_name)
53
+ if @type == Type::BOOL
54
+ return true if(val_name == 'true' || val_name == true)
55
+ return false if(val_name == 'false' || val_name == false)
56
+ return nil
57
+ else
58
+ return @values[val_name]
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class ReversePostProcessorOperator < PostProcessorOperator
27
+
28
+ def apply(l)
29
+ l.reverse
30
+ end
31
+ end
32
+
33
+ ResourceManager::register_post_processor_class("reverse", ReversePostProcessorOperator)
34
+
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class DowncasePreProcessorOperator < PreProcessorOperator
27
+ def apply(l)
28
+ UnicodeUtils.downcase(l)
29
+ end
30
+ end
31
+
32
+ ResourceManager::register_pre_processor_class("downcase", DowncasePreProcessorOperator)
33
+
34
+ end
35
+ end
@@ -0,0 +1,47 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class ElvishNumbersPreProcessorOperator < PreProcessorOperator
27
+
28
+ def apply(l)
29
+ base = args[0]
30
+ base = (base)?(base.to_i):(12)
31
+
32
+ reverse = args[1]
33
+ reverse = (reverse != nil)?(reverse == "true" || reverse == true):(true)
34
+
35
+ l.gsub(/\d+/) { |f|
36
+ ret = f.to_i.to_s(base).upcase()
37
+ ret = ret.reverse if(reverse)
38
+ ret
39
+ }
40
+ end
41
+
42
+ ResourceManager::register_pre_processor_class("elvish_numbers", ElvishNumbersPreProcessorOperator)
43
+
44
+ end
45
+ end
46
+ end
47
+
@@ -0,0 +1,40 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+
24
+ module Glaemscribe
25
+ module API
26
+
27
+ class RxSubstitutePreProcessorOperator < PreProcessorOperator
28
+
29
+ def apply(l)
30
+ what = /#{@args[0]}/
31
+ with = @args[1]
32
+ l.gsub(what, with)
33
+ end
34
+
35
+ ResourceManager::register_pre_processor_class("rxsubstitute", RxSubstitutePreProcessorOperator)
36
+
37
+ end
38
+ end
39
+ end
40
+
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class SubstitutePreProcessorOperator < PreProcessorOperator
27
+
28
+ def apply(l)
29
+ what = @args[0]
30
+ with = @args[1]
31
+ l.gsub(what, with)
32
+ end
33
+ end
34
+
35
+ ResourceManager::register_pre_processor_class("substitute", SubstitutePreProcessorOperator)
36
+
37
+ end
38
+ end
@@ -0,0 +1,138 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class UpDownTehtaSplitPreProcessorOperator < PreProcessorOperator
27
+
28
+ attr_reader :vowel_list, :consonant_list
29
+ def initialize(args)
30
+ super(args)
31
+
32
+ vowel_list = args[0]
33
+ consonant_list = args[1]
34
+
35
+ vowel_list = vowel_list.split(/,/).map{|s| s.strip}
36
+ consonant_list = consonant_list.split(/,/).map{|s| s.strip}
37
+
38
+ @vowel_map = {} # Recognize vowel tokens
39
+ @consonant_map = {} # Recognize consonant tokens
40
+ @splitter_tree = TranscriptionTreeNode.new(nil,nil) # Recognize tokens
41
+ @word_split_map = {}
42
+ # The word split map will help to recognize words
43
+ # The splitter tree will help to split words into tokens
44
+
45
+ vowel_list.each { |v| @splitter_tree.add_subpath(v, v); @vowel_map[v] = v }
46
+ consonant_list.each { |c| @splitter_tree.add_subpath(c, c); @consonant_map[c] = c}
47
+
48
+ all_letters = (vowel_list + consonant_list).join("").split(//).sort.uniq
49
+ all_letters.each{ |l| @word_split_map[l] = l }
50
+ end
51
+
52
+ def type_of(token)
53
+ if @vowel_map[token]
54
+ return "V"
55
+ elsif @consonant_map[token]
56
+ return "C"
57
+ else
58
+ return "X"
59
+ end
60
+ end
61
+
62
+ def apply_to_word(w)
63
+ res = []
64
+
65
+ if w.strip.empty?
66
+ res << w
67
+ else
68
+ while w.length != 0
69
+ r, len = @splitter_tree.transcribe(w)
70
+
71
+ if r != [UNKNOWN_CHAR_OUTPUT]
72
+ res << r
73
+ else
74
+ res << w[0..0] # r
75
+ end
76
+
77
+ w = w[len..-1]
78
+ end
79
+ end
80
+
81
+ res_modified = []
82
+
83
+ # We replace the pattern CVC by CvVC where v is a phantom vowel.
84
+ # This makes the pattern CVC not possible.
85
+ i = 0
86
+ while i < res.count - 2 do
87
+
88
+ r0 = res[i]
89
+ r1 = res[i+1]
90
+ r2 = res[i+2]
91
+ t0 = type_of(r0)
92
+ t1 = type_of(r1)
93
+ t2 = type_of(r2)
94
+
95
+ if t0 == "C" && t1 == "V" && t2 == "C"
96
+ res_modified << res[i]
97
+ res_modified << "@"
98
+ res_modified << res[i+1]
99
+ i += 2
100
+ else
101
+ res_modified << res[i]
102
+ i += 1
103
+ end
104
+ end
105
+
106
+ # Add the remaining stuff
107
+ while i < res.count
108
+ res_modified << res[i]
109
+ i += 1
110
+ end
111
+
112
+ return res_modified.join("")
113
+ end
114
+
115
+ def apply(content)
116
+ accumulated_word = ""
117
+
118
+ ret = ""
119
+
120
+ content.split(//).each{ |letter|
121
+ if @word_split_map[letter]
122
+ accumulated_word += letter
123
+ else
124
+ ret += apply_to_word(accumulated_word)
125
+ ret += letter
126
+ accumulated_word = ""
127
+ end
128
+ }
129
+ ret += apply_to_word(accumulated_word)
130
+ ret
131
+ end
132
+
133
+ end
134
+
135
+ ResourceManager::register_pre_processor_class("up_down_tehta_split", UpDownTehtaSplitPreProcessorOperator)
136
+
137
+ end
138
+ end
@@ -0,0 +1,130 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+ module Glaemscribe
23
+ module API
24
+ module ResourceManager
25
+
26
+ MODE_PATH = File.dirname(__FILE__) + "/../../glaemresources/modes/"
27
+ MODE_EXT = "glaem"
28
+
29
+ CHARSET_PATH = File.dirname(__FILE__) + "/../../glaemresources/charsets/"
30
+ CHARSET_EXT = "cst"
31
+
32
+ ALL = ["*"]
33
+
34
+ @loaded_modes = {}
35
+ @loaded_charsets = {}
36
+
37
+ @pre_processor_operator_classes = {}
38
+ @post_processor_operator_classes = {}
39
+
40
+ def self.available_mode_names
41
+ Dir.glob(MODE_PATH + "*.#{MODE_EXT}").map { |mode_file|
42
+ self.mode_name_from_file_path(mode_file)
43
+ }
44
+ end
45
+
46
+ def self.loaded_modes
47
+ @loaded_modes
48
+ end
49
+
50
+ def self.loaded_charsets
51
+ @loaded_charsets
52
+ end
53
+
54
+ def self.register_pre_processor_class(operator_name, operator_class)
55
+ @pre_processor_operator_classes[operator_name] = operator_class
56
+ end
57
+
58
+ def self.register_post_processor_class(operator_name, operator_class)
59
+ @post_processor_operator_classes[operator_name] = operator_class
60
+ end
61
+
62
+ def self.class_for_pre_processor_operator_name(operator_name)
63
+ @pre_processor_operator_classes[operator_name]
64
+ end
65
+
66
+ def self.class_for_post_processor_operator_name(operator_name)
67
+ @post_processor_operator_classes[operator_name]
68
+ end
69
+
70
+ def self.p
71
+ puts @pre_processor_operator_classes.inspect
72
+ puts @post_processor_operator_classes.inspect
73
+ end
74
+
75
+ def self.mode_name_from_file_path(file_path)
76
+ File.basename(file_path,".*")
77
+ end
78
+
79
+ def self.charset_name_from_file_path(file_path)
80
+ File.basename(file_path,".*")
81
+ end
82
+
83
+ def self.load_modes(which_ones = ALL)
84
+
85
+ which_ones = [which_ones] if(which_ones.is_a?(String))
86
+
87
+ Dir.glob(MODE_PATH + "*.#{MODE_EXT}") { |mode_file|
88
+
89
+ mode_name = self.mode_name_from_file_path(mode_file)
90
+
91
+ next if(which_ones != ALL && !which_ones.include?(mode_name))
92
+ next if(@loaded_modes.include? mode_name) # Don't load a charset twice
93
+
94
+ API::Debug::log("*" * 20)
95
+ API::Debug::log("Parsing Mode : #{mode_name}")
96
+ API::Debug::log("*" * 20)
97
+
98
+ mode = API::ModeParser.new().parse(mode_file)
99
+ @loaded_modes[mode.name] = mode if mode
100
+ }
101
+ end
102
+
103
+ def self.load_charsets(which_ones = ALL)
104
+
105
+ which_ones = [which_ones] if(which_ones.is_a?(String))
106
+
107
+ Dir.glob(CHARSET_PATH + "*.#{CHARSET_EXT}") { |charset_file|
108
+
109
+ charset_name = self.charset_name_from_file_path(charset_file)
110
+
111
+ next if(which_ones != ALL && !which_ones.include?(charset_name))
112
+ next if(@loaded_charsets.include? charset_name) # Don't load a charset twice
113
+
114
+ API::Debug::log("*" * 20)
115
+ API::Debug::log("Parsing Charset : #{charset_name}")
116
+ API::Debug::log("*" * 20)
117
+
118
+ charset = API::CharsetParser.new().parse(charset_file)
119
+
120
+ @loaded_charsets[charset.name] = charset if charset
121
+ }
122
+ end
123
+
124
+ def self.charset(name)
125
+ @loaded_charsets[name]
126
+ end
127
+
128
+ end
129
+ end
130
+ end