glaemscribe 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/bin/glaemscribe +307 -0
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds.cst +318 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +120 -0
- data/glaemresources/modes/adunaic.glaem +251 -0
- data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
- data/glaemresources/modes/blackspeech.glaem +260 -0
- data/glaemresources/modes/gothic.glaem +78 -0
- data/glaemresources/modes/khuzdul.glaem +141 -0
- data/glaemresources/modes/mercian.glaem +419 -0
- data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
- data/glaemresources/modes/quenya-sarati.glaem +320 -0
- data/glaemresources/modes/quenya.glaem +307 -0
- data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
- data/glaemresources/modes/sindarin-classical.glaem +276 -0
- data/glaemresources/modes/sindarin-daeron.glaem +182 -0
- data/glaemresources/modes/telerin.glaem +302 -0
- data/glaemresources/modes/valarin-sarati.glaem +210 -0
- data/glaemresources/modes/westron.glaem +340 -0
- data/glaemresources/modes/westsaxon.glaem +342 -0
- data/lib/api/charset.rb +84 -0
- data/lib/api/charset_parser.rb +55 -0
- data/lib/api/constants.rb +29 -0
- data/lib/api/debug.rb +36 -0
- data/lib/api/eval.rb +268 -0
- data/lib/api/fragment.rb +113 -0
- data/lib/api/glaeml.rb +200 -0
- data/lib/api/if_tree.rb +96 -0
- data/lib/api/mode.rb +112 -0
- data/lib/api/mode_parser.rb +314 -0
- data/lib/api/option.rb +64 -0
- data/lib/api/post_processor/reverse.rb +36 -0
- data/lib/api/pre_processor/downcase.rb +35 -0
- data/lib/api/pre_processor/elvish_numbers.rb +47 -0
- data/lib/api/pre_processor/rxsubstitute.rb +40 -0
- data/lib/api/pre_processor/substitute.rb +38 -0
- data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
- data/lib/api/resource_manager.rb +130 -0
- data/lib/api/rule.rb +99 -0
- data/lib/api/rule_group.rb +159 -0
- data/lib/api/sheaf.rb +70 -0
- data/lib/api/sheaf_chain.rb +86 -0
- data/lib/api/sheaf_chain_iterator.rb +108 -0
- data/lib/api/sub_rule.rb +40 -0
- data/lib/api/transcription_pre_post_processor.rb +118 -0
- data/lib/api/transcription_processor.rb +137 -0
- data/lib/api/transcription_tree_node.rb +91 -0
- data/lib/glaemscribe.rb +70 -0
- metadata +112 -0
data/lib/api/sub_rule.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class SubRule
|
26
|
+
|
27
|
+
attr_reader :src_combination, :dst_combination
|
28
|
+
|
29
|
+
def initialize(rule, src_combination, dst_combination)
|
30
|
+
@src_combination = src_combination
|
31
|
+
@dst_combination = dst_combination
|
32
|
+
end
|
33
|
+
|
34
|
+
def p
|
35
|
+
"#{@src_combination.inspect} => #{@dst_combination.inspect} \n"
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
|
26
|
+
class PrePostProcessorOperator
|
27
|
+
attr_reader :args
|
28
|
+
|
29
|
+
def initialize(raw_args)
|
30
|
+
@raw_args = raw_args
|
31
|
+
end
|
32
|
+
|
33
|
+
def eval_arg(arg, trans_options)
|
34
|
+
return nil if arg.nil?
|
35
|
+
if arg =~ /^\\eval\s/
|
36
|
+
to_eval = $'
|
37
|
+
return Eval::Parser.new().parse(to_eval, trans_options)
|
38
|
+
end
|
39
|
+
return arg
|
40
|
+
end
|
41
|
+
|
42
|
+
def finalize(trans_options)
|
43
|
+
@args = []
|
44
|
+
@raw_args.each{ |arg|
|
45
|
+
@args << eval_arg(arg, trans_options)
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
def apply(l)
|
50
|
+
raise "Pure virtual method, should be overloaded."
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class TranscriptionPrePostProcessor
|
55
|
+
attr_reader :root_code_block
|
56
|
+
|
57
|
+
attr_reader :operators
|
58
|
+
|
59
|
+
def initialize(mode)
|
60
|
+
@mode = mode
|
61
|
+
@root_code_block = IfTree::CodeBlock.new
|
62
|
+
end
|
63
|
+
|
64
|
+
def descend_if_tree(code_block, trans_options)
|
65
|
+
code_block.terms.each{ |term|
|
66
|
+
if(term.is_pre_post_processor_operators?)
|
67
|
+
term.operators.each{ |operator|
|
68
|
+
@operators << operator
|
69
|
+
}
|
70
|
+
else
|
71
|
+
term.if_conds.each{ |if_cond|
|
72
|
+
|
73
|
+
if_eval = Eval::Parser.new()
|
74
|
+
|
75
|
+
if(if_eval.parse(if_cond.expression, trans_options) == true)
|
76
|
+
descend_if_tree(if_cond.child_code_block, trans_options)
|
77
|
+
break
|
78
|
+
end
|
79
|
+
}
|
80
|
+
end
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
def finalize(trans_options)
|
85
|
+
@operators = []
|
86
|
+
# Select operators depending on conditions
|
87
|
+
descend_if_tree(@root_code_block, trans_options)
|
88
|
+
# Reevaluate operator arguments
|
89
|
+
@operators.each{ |op|
|
90
|
+
op.finalize(trans_options)
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
# Apply all preprocessor rules consecutively
|
95
|
+
def apply(l)
|
96
|
+
ret = l
|
97
|
+
@operators.each{ |operator|
|
98
|
+
ret = operator.apply(ret)
|
99
|
+
}
|
100
|
+
ret
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
class PreProcessorOperator < PrePostProcessorOperator
|
106
|
+
end
|
107
|
+
|
108
|
+
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
109
|
+
end
|
110
|
+
|
111
|
+
class PostProcessorOperator < PrePostProcessorOperator
|
112
|
+
end
|
113
|
+
|
114
|
+
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class TranscriptionProcessor
|
26
|
+
|
27
|
+
attr_reader :rule_groups
|
28
|
+
attr_reader :mode
|
29
|
+
|
30
|
+
attr_accessor :out_space
|
31
|
+
|
32
|
+
def initialize(mode)
|
33
|
+
@mode = mode
|
34
|
+
@rule_groups = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
def add_subrule(sub_rule)
|
38
|
+
path = sub_rule.src_combination.join("")
|
39
|
+
@transcription_tree.add_subpath(path, sub_rule.dst_combination)
|
40
|
+
end
|
41
|
+
|
42
|
+
def finalize(trans_options)
|
43
|
+
@errors = []
|
44
|
+
|
45
|
+
@transcription_tree = TranscriptionTreeNode.new(nil,nil)
|
46
|
+
|
47
|
+
# Add WORD_BOUNDARY and WORD_BREAKER in the tree
|
48
|
+
@transcription_tree.add_subpath(WORD_BOUNDARY, [""])
|
49
|
+
@transcription_tree.add_subpath(WORD_BREAKER, [""])
|
50
|
+
|
51
|
+
rule_groups.each{ |rgname, rg|
|
52
|
+
rg.finalize(trans_options)
|
53
|
+
}
|
54
|
+
|
55
|
+
# Build the input charset
|
56
|
+
@in_charset = {}
|
57
|
+
rule_groups.each{ |rgname, rg|
|
58
|
+
rg.in_charset.each{ |char, group|
|
59
|
+
group_for_char = @in_charset[char]
|
60
|
+
if group_for_char
|
61
|
+
mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
|
62
|
+
else
|
63
|
+
@in_charset[char] = group
|
64
|
+
end
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
# Build the transcription tree
|
69
|
+
rule_groups.each{ |rgname, rg|
|
70
|
+
rg.rules.each { |r|
|
71
|
+
r.sub_rules.each{ |sr|
|
72
|
+
add_subrule(sr)
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def apply(l, out_charset)
|
79
|
+
ret = ""
|
80
|
+
current_group = nil
|
81
|
+
accumulated_word = ""
|
82
|
+
|
83
|
+
out_space_str = " "
|
84
|
+
out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
|
85
|
+
|
86
|
+
l.split("").each{ |c|
|
87
|
+
case c
|
88
|
+
when " ", "\t"
|
89
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
90
|
+
ret += out_space_str
|
91
|
+
|
92
|
+
accumulated_word = ""
|
93
|
+
when "\r", "\n"
|
94
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
95
|
+
ret += c
|
96
|
+
|
97
|
+
accumulated_word = ""
|
98
|
+
else
|
99
|
+
c_group = @in_charset[c]
|
100
|
+
if c_group == current_group
|
101
|
+
accumulated_word += c
|
102
|
+
else
|
103
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
104
|
+
current_group = c_group
|
105
|
+
accumulated_word = c
|
106
|
+
end
|
107
|
+
end
|
108
|
+
}
|
109
|
+
# Just in case
|
110
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
111
|
+
ret
|
112
|
+
end
|
113
|
+
|
114
|
+
def transcribe_word(word, out_charset)
|
115
|
+
res = []
|
116
|
+
word = WORD_BOUNDARY + word + WORD_BOUNDARY
|
117
|
+
while word.length != 0
|
118
|
+
r, len = @transcription_tree.transcribe(word)
|
119
|
+
word = word[len..-1]
|
120
|
+
res += r
|
121
|
+
end
|
122
|
+
ret = ""
|
123
|
+
res.each{ |token|
|
124
|
+
case token
|
125
|
+
when ""
|
126
|
+
when UNKNOWN_CHAR_OUTPUT
|
127
|
+
ret += UNKNOWN_CHAR_OUTPUT
|
128
|
+
else
|
129
|
+
ret += out_charset[token].str
|
130
|
+
end
|
131
|
+
}
|
132
|
+
ret
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class TranscriptionTreeNode
|
26
|
+
attr_accessor :character, :siblings, :replacement
|
27
|
+
|
28
|
+
def initialize(character, replacement)
|
29
|
+
@character = character
|
30
|
+
@replacement = replacement
|
31
|
+
@siblings = {}
|
32
|
+
end
|
33
|
+
|
34
|
+
def _p
|
35
|
+
puts "Node has #{@siblings.keys.count} siblings."
|
36
|
+
@siblings.each{ |k,v|
|
37
|
+
puts "#{k}, effective: #{v.effective?}"
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def _pchain(chain)
|
42
|
+
"[" + chain.map{|node| node.character||"ROOT"}.join(", ") + "]"
|
43
|
+
end
|
44
|
+
|
45
|
+
def effective?
|
46
|
+
!@replacement.nil?
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_subpath(source, rep)
|
50
|
+
return if source.nil? || source.empty?
|
51
|
+
cc = source[0..0]
|
52
|
+
|
53
|
+
sibling = @siblings[cc]
|
54
|
+
sibling = TranscriptionTreeNode.new(cc, nil) if !sibling
|
55
|
+
@siblings[cc] = sibling
|
56
|
+
|
57
|
+
if source.length == 1
|
58
|
+
# Sibling is effective
|
59
|
+
sibling.replacement = rep
|
60
|
+
else
|
61
|
+
sibling.add_subpath(source[1..-1], rep)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def transcribe(string, chain=[])
|
66
|
+
|
67
|
+
chain << self
|
68
|
+
|
69
|
+
if !string.empty?
|
70
|
+
cc = string[0..0]
|
71
|
+
sibling = @siblings[cc]
|
72
|
+
|
73
|
+
if sibling
|
74
|
+
return sibling.transcribe(string[1..-1], chain)
|
75
|
+
end # Else we are at the end
|
76
|
+
end # Else we are at the end
|
77
|
+
|
78
|
+
# puts "End of chain: #{chain.count}, #{_pchain(chain)}"
|
79
|
+
|
80
|
+
# We are at the end of the chain
|
81
|
+
while chain.count > 1
|
82
|
+
last_node = chain.pop
|
83
|
+
return last_node.replacement, chain.count if last_node.effective?
|
84
|
+
end
|
85
|
+
|
86
|
+
# Only the root node is in the chain, we could not find anything; return the "unknown char"
|
87
|
+
return [UNKNOWN_CHAR_OUTPUT], 1
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/glaemscribe.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
|
24
|
+
# Following tools are in the standard lib
|
25
|
+
require "shellwords"
|
26
|
+
require "unicode_utils/downcase"
|
27
|
+
require "FileUtils" if !defined? FileUtils
|
28
|
+
|
29
|
+
module Glaemscribe
|
30
|
+
module API
|
31
|
+
API_PATH = File.dirname(__FILE__)
|
32
|
+
|
33
|
+
require API_PATH + "/api/debug.rb"
|
34
|
+
require API_PATH + "/api/constants.rb"
|
35
|
+
|
36
|
+
require API_PATH + "/api/fragment.rb"
|
37
|
+
require API_PATH + "/api/sheaf_chain_iterator.rb"
|
38
|
+
require API_PATH + "/api/sheaf_chain.rb"
|
39
|
+
require API_PATH + "/api/sheaf.rb"
|
40
|
+
|
41
|
+
require API_PATH + "/api/rule.rb"
|
42
|
+
require API_PATH + "/api/sub_rule.rb"
|
43
|
+
require API_PATH + "/api/rule_group.rb"
|
44
|
+
|
45
|
+
require API_PATH + "/api/eval.rb"
|
46
|
+
require API_PATH + "/api/if_tree.rb"
|
47
|
+
|
48
|
+
require API_PATH + "/api/transcription_tree_node.rb"
|
49
|
+
|
50
|
+
require API_PATH + "/api/transcription_pre_post_processor.rb"
|
51
|
+
require API_PATH + "/api/transcription_processor.rb"
|
52
|
+
|
53
|
+
require API_PATH + "/api/charset.rb"
|
54
|
+
require API_PATH + "/api/mode.rb"
|
55
|
+
require API_PATH + "/api/option.rb"
|
56
|
+
|
57
|
+
require API_PATH + "/api/resource_manager.rb"
|
58
|
+
require API_PATH + "/api/glaeml.rb"
|
59
|
+
require API_PATH + "/api/mode_parser.rb"
|
60
|
+
require API_PATH + "/api/charset_parser.rb"
|
61
|
+
|
62
|
+
require API_PATH + "/api/pre_processor/elvish_numbers.rb"
|
63
|
+
require API_PATH + "/api/pre_processor/downcase.rb"
|
64
|
+
require API_PATH + "/api/pre_processor/substitute.rb"
|
65
|
+
require API_PATH + "/api/pre_processor/rxsubstitute.rb"
|
66
|
+
require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
|
67
|
+
require API_PATH + "/api/post_processor/reverse.rb"
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|