glaemscribe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/bin/glaemscribe +307 -0
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds.cst +318 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +120 -0
- data/glaemresources/modes/adunaic.glaem +251 -0
- data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
- data/glaemresources/modes/blackspeech.glaem +260 -0
- data/glaemresources/modes/gothic.glaem +78 -0
- data/glaemresources/modes/khuzdul.glaem +141 -0
- data/glaemresources/modes/mercian.glaem +419 -0
- data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
- data/glaemresources/modes/quenya-sarati.glaem +320 -0
- data/glaemresources/modes/quenya.glaem +307 -0
- data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
- data/glaemresources/modes/sindarin-classical.glaem +276 -0
- data/glaemresources/modes/sindarin-daeron.glaem +182 -0
- data/glaemresources/modes/telerin.glaem +302 -0
- data/glaemresources/modes/valarin-sarati.glaem +210 -0
- data/glaemresources/modes/westron.glaem +340 -0
- data/glaemresources/modes/westsaxon.glaem +342 -0
- data/lib/api/charset.rb +84 -0
- data/lib/api/charset_parser.rb +55 -0
- data/lib/api/constants.rb +29 -0
- data/lib/api/debug.rb +36 -0
- data/lib/api/eval.rb +268 -0
- data/lib/api/fragment.rb +113 -0
- data/lib/api/glaeml.rb +200 -0
- data/lib/api/if_tree.rb +96 -0
- data/lib/api/mode.rb +112 -0
- data/lib/api/mode_parser.rb +314 -0
- data/lib/api/option.rb +64 -0
- data/lib/api/post_processor/reverse.rb +36 -0
- data/lib/api/pre_processor/downcase.rb +35 -0
- data/lib/api/pre_processor/elvish_numbers.rb +47 -0
- data/lib/api/pre_processor/rxsubstitute.rb +40 -0
- data/lib/api/pre_processor/substitute.rb +38 -0
- data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
- data/lib/api/resource_manager.rb +130 -0
- data/lib/api/rule.rb +99 -0
- data/lib/api/rule_group.rb +159 -0
- data/lib/api/sheaf.rb +70 -0
- data/lib/api/sheaf_chain.rb +86 -0
- data/lib/api/sheaf_chain_iterator.rb +108 -0
- data/lib/api/sub_rule.rb +40 -0
- data/lib/api/transcription_pre_post_processor.rb +118 -0
- data/lib/api/transcription_processor.rb +137 -0
- data/lib/api/transcription_tree_node.rb +91 -0
- data/lib/glaemscribe.rb +70 -0
- metadata +112 -0
data/lib/api/sub_rule.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class SubRule
|
26
|
+
|
27
|
+
attr_reader :src_combination, :dst_combination
|
28
|
+
|
29
|
+
def initialize(rule, src_combination, dst_combination)
|
30
|
+
@src_combination = src_combination
|
31
|
+
@dst_combination = dst_combination
|
32
|
+
end
|
33
|
+
|
34
|
+
def p
|
35
|
+
"#{@src_combination.inspect} => #{@dst_combination.inspect} \n"
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
|
26
|
+
class PrePostProcessorOperator
|
27
|
+
attr_reader :args
|
28
|
+
|
29
|
+
def initialize(raw_args)
|
30
|
+
@raw_args = raw_args
|
31
|
+
end
|
32
|
+
|
33
|
+
def eval_arg(arg, trans_options)
|
34
|
+
return nil if arg.nil?
|
35
|
+
if arg =~ /^\\eval\s/
|
36
|
+
to_eval = $'
|
37
|
+
return Eval::Parser.new().parse(to_eval, trans_options)
|
38
|
+
end
|
39
|
+
return arg
|
40
|
+
end
|
41
|
+
|
42
|
+
def finalize(trans_options)
|
43
|
+
@args = []
|
44
|
+
@raw_args.each{ |arg|
|
45
|
+
@args << eval_arg(arg, trans_options)
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
def apply(l)
|
50
|
+
raise "Pure virtual method, should be overloaded."
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class TranscriptionPrePostProcessor
|
55
|
+
attr_reader :root_code_block
|
56
|
+
|
57
|
+
attr_reader :operators
|
58
|
+
|
59
|
+
def initialize(mode)
|
60
|
+
@mode = mode
|
61
|
+
@root_code_block = IfTree::CodeBlock.new
|
62
|
+
end
|
63
|
+
|
64
|
+
def descend_if_tree(code_block, trans_options)
|
65
|
+
code_block.terms.each{ |term|
|
66
|
+
if(term.is_pre_post_processor_operators?)
|
67
|
+
term.operators.each{ |operator|
|
68
|
+
@operators << operator
|
69
|
+
}
|
70
|
+
else
|
71
|
+
term.if_conds.each{ |if_cond|
|
72
|
+
|
73
|
+
if_eval = Eval::Parser.new()
|
74
|
+
|
75
|
+
if(if_eval.parse(if_cond.expression, trans_options) == true)
|
76
|
+
descend_if_tree(if_cond.child_code_block, trans_options)
|
77
|
+
break
|
78
|
+
end
|
79
|
+
}
|
80
|
+
end
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
def finalize(trans_options)
|
85
|
+
@operators = []
|
86
|
+
# Select operators depending on conditions
|
87
|
+
descend_if_tree(@root_code_block, trans_options)
|
88
|
+
# Reevaluate operator arguments
|
89
|
+
@operators.each{ |op|
|
90
|
+
op.finalize(trans_options)
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
# Apply all preprocessor rules consecutively
|
95
|
+
def apply(l)
|
96
|
+
ret = l
|
97
|
+
@operators.each{ |operator|
|
98
|
+
ret = operator.apply(ret)
|
99
|
+
}
|
100
|
+
ret
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
class PreProcessorOperator < PrePostProcessorOperator
|
106
|
+
end
|
107
|
+
|
108
|
+
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
109
|
+
end
|
110
|
+
|
111
|
+
class PostProcessorOperator < PrePostProcessorOperator
|
112
|
+
end
|
113
|
+
|
114
|
+
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class TranscriptionProcessor
|
26
|
+
|
27
|
+
attr_reader :rule_groups
|
28
|
+
attr_reader :mode
|
29
|
+
|
30
|
+
attr_accessor :out_space
|
31
|
+
|
32
|
+
def initialize(mode)
|
33
|
+
@mode = mode
|
34
|
+
@rule_groups = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
def add_subrule(sub_rule)
|
38
|
+
path = sub_rule.src_combination.join("")
|
39
|
+
@transcription_tree.add_subpath(path, sub_rule.dst_combination)
|
40
|
+
end
|
41
|
+
|
42
|
+
def finalize(trans_options)
|
43
|
+
@errors = []
|
44
|
+
|
45
|
+
@transcription_tree = TranscriptionTreeNode.new(nil,nil)
|
46
|
+
|
47
|
+
# Add WORD_BOUNDARY and WORD_BREAKER in the tree
|
48
|
+
@transcription_tree.add_subpath(WORD_BOUNDARY, [""])
|
49
|
+
@transcription_tree.add_subpath(WORD_BREAKER, [""])
|
50
|
+
|
51
|
+
rule_groups.each{ |rgname, rg|
|
52
|
+
rg.finalize(trans_options)
|
53
|
+
}
|
54
|
+
|
55
|
+
# Build the input charset
|
56
|
+
@in_charset = {}
|
57
|
+
rule_groups.each{ |rgname, rg|
|
58
|
+
rg.in_charset.each{ |char, group|
|
59
|
+
group_for_char = @in_charset[char]
|
60
|
+
if group_for_char
|
61
|
+
mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
|
62
|
+
else
|
63
|
+
@in_charset[char] = group
|
64
|
+
end
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
# Build the transcription tree
|
69
|
+
rule_groups.each{ |rgname, rg|
|
70
|
+
rg.rules.each { |r|
|
71
|
+
r.sub_rules.each{ |sr|
|
72
|
+
add_subrule(sr)
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def apply(l, out_charset)
|
79
|
+
ret = ""
|
80
|
+
current_group = nil
|
81
|
+
accumulated_word = ""
|
82
|
+
|
83
|
+
out_space_str = " "
|
84
|
+
out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
|
85
|
+
|
86
|
+
l.split("").each{ |c|
|
87
|
+
case c
|
88
|
+
when " ", "\t"
|
89
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
90
|
+
ret += out_space_str
|
91
|
+
|
92
|
+
accumulated_word = ""
|
93
|
+
when "\r", "\n"
|
94
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
95
|
+
ret += c
|
96
|
+
|
97
|
+
accumulated_word = ""
|
98
|
+
else
|
99
|
+
c_group = @in_charset[c]
|
100
|
+
if c_group == current_group
|
101
|
+
accumulated_word += c
|
102
|
+
else
|
103
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
104
|
+
current_group = c_group
|
105
|
+
accumulated_word = c
|
106
|
+
end
|
107
|
+
end
|
108
|
+
}
|
109
|
+
# Just in case
|
110
|
+
ret += transcribe_word(accumulated_word, out_charset)
|
111
|
+
ret
|
112
|
+
end
|
113
|
+
|
114
|
+
def transcribe_word(word, out_charset)
|
115
|
+
res = []
|
116
|
+
word = WORD_BOUNDARY + word + WORD_BOUNDARY
|
117
|
+
while word.length != 0
|
118
|
+
r, len = @transcription_tree.transcribe(word)
|
119
|
+
word = word[len..-1]
|
120
|
+
res += r
|
121
|
+
end
|
122
|
+
ret = ""
|
123
|
+
res.each{ |token|
|
124
|
+
case token
|
125
|
+
when ""
|
126
|
+
when UNKNOWN_CHAR_OUTPUT
|
127
|
+
ret += UNKNOWN_CHAR_OUTPUT
|
128
|
+
else
|
129
|
+
ret += out_charset[token].str
|
130
|
+
end
|
131
|
+
}
|
132
|
+
ret
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
class TranscriptionTreeNode
|
26
|
+
attr_accessor :character, :siblings, :replacement
|
27
|
+
|
28
|
+
def initialize(character, replacement)
|
29
|
+
@character = character
|
30
|
+
@replacement = replacement
|
31
|
+
@siblings = {}
|
32
|
+
end
|
33
|
+
|
34
|
+
def _p
|
35
|
+
puts "Node has #{@siblings.keys.count} siblings."
|
36
|
+
@siblings.each{ |k,v|
|
37
|
+
puts "#{k}, effective: #{v.effective?}"
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def _pchain(chain)
|
42
|
+
"[" + chain.map{|node| node.character||"ROOT"}.join(", ") + "]"
|
43
|
+
end
|
44
|
+
|
45
|
+
def effective?
|
46
|
+
!@replacement.nil?
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_subpath(source, rep)
|
50
|
+
return if source.nil? || source.empty?
|
51
|
+
cc = source[0..0]
|
52
|
+
|
53
|
+
sibling = @siblings[cc]
|
54
|
+
sibling = TranscriptionTreeNode.new(cc, nil) if !sibling
|
55
|
+
@siblings[cc] = sibling
|
56
|
+
|
57
|
+
if source.length == 1
|
58
|
+
# Sibling is effective
|
59
|
+
sibling.replacement = rep
|
60
|
+
else
|
61
|
+
sibling.add_subpath(source[1..-1], rep)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def transcribe(string, chain=[])
|
66
|
+
|
67
|
+
chain << self
|
68
|
+
|
69
|
+
if !string.empty?
|
70
|
+
cc = string[0..0]
|
71
|
+
sibling = @siblings[cc]
|
72
|
+
|
73
|
+
if sibling
|
74
|
+
return sibling.transcribe(string[1..-1], chain)
|
75
|
+
end # Else we are at the end
|
76
|
+
end # Else we are at the end
|
77
|
+
|
78
|
+
# puts "End of chain: #{chain.count}, #{_pchain(chain)}"
|
79
|
+
|
80
|
+
# We are at the end of the chain
|
81
|
+
while chain.count > 1
|
82
|
+
last_node = chain.pop
|
83
|
+
return last_node.replacement, chain.count if last_node.effective?
|
84
|
+
end
|
85
|
+
|
86
|
+
# Only the root node is in the chain, we could not find anything; return the "unknown char"
|
87
|
+
return [UNKNOWN_CHAR_OUTPUT], 1
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/glaemscribe.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
|
24
|
+
# Following tools are in the standard lib
|
25
|
+
require "shellwords"
|
26
|
+
require "unicode_utils/downcase"
|
27
|
+
require "FileUtils" if !defined? FileUtils
|
28
|
+
|
29
|
+
module Glaemscribe
|
30
|
+
module API
|
31
|
+
API_PATH = File.dirname(__FILE__)
|
32
|
+
|
33
|
+
require API_PATH + "/api/debug.rb"
|
34
|
+
require API_PATH + "/api/constants.rb"
|
35
|
+
|
36
|
+
require API_PATH + "/api/fragment.rb"
|
37
|
+
require API_PATH + "/api/sheaf_chain_iterator.rb"
|
38
|
+
require API_PATH + "/api/sheaf_chain.rb"
|
39
|
+
require API_PATH + "/api/sheaf.rb"
|
40
|
+
|
41
|
+
require API_PATH + "/api/rule.rb"
|
42
|
+
require API_PATH + "/api/sub_rule.rb"
|
43
|
+
require API_PATH + "/api/rule_group.rb"
|
44
|
+
|
45
|
+
require API_PATH + "/api/eval.rb"
|
46
|
+
require API_PATH + "/api/if_tree.rb"
|
47
|
+
|
48
|
+
require API_PATH + "/api/transcription_tree_node.rb"
|
49
|
+
|
50
|
+
require API_PATH + "/api/transcription_pre_post_processor.rb"
|
51
|
+
require API_PATH + "/api/transcription_processor.rb"
|
52
|
+
|
53
|
+
require API_PATH + "/api/charset.rb"
|
54
|
+
require API_PATH + "/api/mode.rb"
|
55
|
+
require API_PATH + "/api/option.rb"
|
56
|
+
|
57
|
+
require API_PATH + "/api/resource_manager.rb"
|
58
|
+
require API_PATH + "/api/glaeml.rb"
|
59
|
+
require API_PATH + "/api/mode_parser.rb"
|
60
|
+
require API_PATH + "/api/charset_parser.rb"
|
61
|
+
|
62
|
+
require API_PATH + "/api/pre_processor/elvish_numbers.rb"
|
63
|
+
require API_PATH + "/api/pre_processor/downcase.rb"
|
64
|
+
require API_PATH + "/api/pre_processor/substitute.rb"
|
65
|
+
require API_PATH + "/api/pre_processor/rxsubstitute.rb"
|
66
|
+
require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
|
67
|
+
require API_PATH + "/api/post_processor/reverse.rb"
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|