glaemscribe 1.0.13 → 1.0.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/glaemresources/charsets/tengwar_ds.cst +20 -11
- data/glaemresources/modes/futhark-long-branch.glaem.disabled +101 -0
- data/glaemresources/modes/futhark-short-twig.glaem.disabled +101 -0
- data/glaemresources/modes/futhorc.glaem.disabled +123 -0
- data/glaemresources/modes/khuzdul.glaem +11 -4
- data/glaemresources/modes/oldnorse-medieval.glaem +1 -1
- data/glaemresources/modes/quenya-sarati.glaem +9 -3
- data/glaemresources/modes/quenya.glaem +215 -173
- data/glaemresources/modes/sindarin-daeron.glaem +6 -6
- data/glaemresources/modes/valarin-sarati.glaem +5 -1
- data/lib/api/fragment.rb +1 -1
- data/lib/api/mode.rb +2 -2
- data/lib/api/mode_parser.rb +6 -11
- data/lib/api/post_processor/csub.rb +64 -0
- data/lib/api/post_processor/reverse.rb +2 -3
- data/lib/api/sheaf.rb +2 -0
- data/lib/api/sheaf_chain.rb +2 -0
- data/lib/api/transcription_pre_post_processor.rb +43 -13
- data/lib/api/transcription_processor.rb +17 -29
- data/lib/api/transcription_tree_node.rb +1 -1
- data/lib/glaemscribe.rb +1 -0
- metadata +5 -1
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
|
26
|
+
class CSubPostProcessorOperator < PostProcessorOperator
|
27
|
+
attr_reader :matcher
|
28
|
+
attr_reader :triggers
|
29
|
+
|
30
|
+
def initialize(args)
|
31
|
+
super(args)
|
32
|
+
|
33
|
+
# Build our operator
|
34
|
+
@matcher = self.raw_args[0]
|
35
|
+
@triggers = Hash.new
|
36
|
+
|
37
|
+
self.raw_args.each{ |arg|
|
38
|
+
|
39
|
+
splitted = arg.split()
|
40
|
+
replacer = splitted.shift()
|
41
|
+
|
42
|
+
splitted.each{ |token|
|
43
|
+
@triggers[token] = replacer
|
44
|
+
}
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def apply(tokens)
|
49
|
+
last_trigger_replacer = nil
|
50
|
+
tokens.each_with_index{ |token,idx|
|
51
|
+
if token == @matcher && last_trigger_replacer != nil
|
52
|
+
tokens[idx] = last_trigger_replacer
|
53
|
+
elsif @triggers[token] != nil
|
54
|
+
last_trigger_replacer = @triggers[token]
|
55
|
+
end
|
56
|
+
}
|
57
|
+
tokens
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
ResourceManager::register_post_processor_class("csub", CSubPostProcessorOperator)
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
data/lib/api/sheaf.rb
CHANGED
data/lib/api/sheaf_chain.rb
CHANGED
@@ -25,6 +25,7 @@ module Glaemscribe
|
|
25
25
|
|
26
26
|
class PrePostProcessorOperator
|
27
27
|
attr_reader :args
|
28
|
+
attr_reader :raw_args
|
28
29
|
|
29
30
|
def initialize(raw_args)
|
30
31
|
@raw_args = raw_args
|
@@ -46,7 +47,7 @@ module Glaemscribe
|
|
46
47
|
}
|
47
48
|
end
|
48
49
|
|
49
|
-
def apply
|
50
|
+
def apply
|
50
51
|
raise "Pure virtual method, should be overloaded."
|
51
52
|
end
|
52
53
|
end
|
@@ -90,7 +91,16 @@ module Glaemscribe
|
|
90
91
|
op.finalize(trans_options)
|
91
92
|
}
|
92
93
|
end
|
93
|
-
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
class PreProcessorOperator < PrePostProcessorOperator
|
98
|
+
end
|
99
|
+
|
100
|
+
class PostProcessorOperator < PrePostProcessorOperator
|
101
|
+
end
|
102
|
+
|
103
|
+
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
94
104
|
# Apply all preprocessor rules consecutively
|
95
105
|
def apply(l)
|
96
106
|
ret = l
|
@@ -99,19 +109,39 @@ module Glaemscribe
|
|
99
109
|
}
|
100
110
|
ret
|
101
111
|
end
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
class PreProcessorOperator < PrePostProcessorOperator
|
106
|
-
end
|
107
|
-
|
108
|
-
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
109
|
-
end
|
110
|
-
|
111
|
-
class PostProcessorOperator < PrePostProcessorOperator
|
112
112
|
end
|
113
113
|
|
114
|
-
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
114
|
+
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
115
|
+
|
116
|
+
attr_accessor :out_space
|
117
|
+
|
118
|
+
def apply(tokens, out_charset)
|
119
|
+
|
120
|
+
out_space_str = " "
|
121
|
+
out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
|
122
|
+
|
123
|
+
# Apply filters
|
124
|
+
@operators.each{ |operator|
|
125
|
+
tokens = operator.apply(tokens)
|
126
|
+
}
|
127
|
+
|
128
|
+
# Convert output
|
129
|
+
ret = ""
|
130
|
+
tokens.each{ |token|
|
131
|
+
case token
|
132
|
+
when ""
|
133
|
+
when "*UNKNOWN"
|
134
|
+
ret += UNKNOWN_CHAR_OUTPUT
|
135
|
+
when "*SPACE"
|
136
|
+
ret += out_space_str
|
137
|
+
when "*LF"
|
138
|
+
ret += "\n"
|
139
|
+
else
|
140
|
+
ret += out_charset[token].str
|
141
|
+
end
|
142
|
+
}
|
143
|
+
ret
|
144
|
+
end
|
115
145
|
end
|
116
146
|
|
117
147
|
end
|
@@ -26,9 +26,7 @@ module Glaemscribe
|
|
26
26
|
|
27
27
|
attr_reader :rule_groups
|
28
28
|
attr_reader :mode
|
29
|
-
|
30
|
-
attr_accessor :out_space
|
31
|
-
|
29
|
+
|
32
30
|
def initialize(mode)
|
33
31
|
@mode = mode
|
34
32
|
@rule_groups = {}
|
@@ -58,7 +56,7 @@ module Glaemscribe
|
|
58
56
|
rg.in_charset.each{ |char, group|
|
59
57
|
group_for_char = @in_charset[char]
|
60
58
|
if group_for_char
|
61
|
-
mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
|
59
|
+
mode.errors << Glaeml::Error.new(-1,"Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups.")
|
62
60
|
else
|
63
61
|
@in_charset[char] = group
|
64
62
|
end
|
@@ -75,24 +73,23 @@ module Glaemscribe
|
|
75
73
|
}
|
76
74
|
end
|
77
75
|
|
78
|
-
def apply(l
|
79
|
-
ret =
|
76
|
+
def apply(l)
|
77
|
+
ret = []
|
80
78
|
current_group = nil
|
81
79
|
accumulated_word = ""
|
82
|
-
|
83
|
-
out_space_str = " "
|
84
|
-
out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
|
85
|
-
|
80
|
+
|
86
81
|
l.split("").each{ |c|
|
87
82
|
case c
|
88
83
|
when " ", "\t"
|
89
|
-
ret += transcribe_word(accumulated_word
|
90
|
-
ret +=
|
84
|
+
ret += transcribe_word(accumulated_word)
|
85
|
+
ret += ["*SPACE"]
|
91
86
|
|
92
87
|
accumulated_word = ""
|
93
|
-
when "\r"
|
94
|
-
|
95
|
-
|
88
|
+
when "\r"
|
89
|
+
# Ignore
|
90
|
+
when "\n"
|
91
|
+
ret += transcribe_word(accumulated_word)
|
92
|
+
ret += ["*LF"]
|
96
93
|
|
97
94
|
accumulated_word = ""
|
98
95
|
else
|
@@ -100,18 +97,18 @@ module Glaemscribe
|
|
100
97
|
if c_group == current_group
|
101
98
|
accumulated_word += c
|
102
99
|
else
|
103
|
-
ret += transcribe_word(accumulated_word
|
100
|
+
ret += transcribe_word(accumulated_word)
|
104
101
|
current_group = c_group
|
105
102
|
accumulated_word = c
|
106
103
|
end
|
107
104
|
end
|
108
105
|
}
|
109
106
|
# Just in case
|
110
|
-
ret += transcribe_word(accumulated_word
|
107
|
+
ret += transcribe_word(accumulated_word)
|
111
108
|
ret
|
112
109
|
end
|
113
110
|
|
114
|
-
def transcribe_word(word
|
111
|
+
def transcribe_word(word)
|
115
112
|
res = []
|
116
113
|
word = WORD_BOUNDARY + word + WORD_BOUNDARY
|
117
114
|
while word.length != 0
|
@@ -119,17 +116,8 @@ module Glaemscribe
|
|
119
116
|
word = word[len..-1]
|
120
117
|
res += r
|
121
118
|
end
|
122
|
-
|
123
|
-
res
|
124
|
-
case token
|
125
|
-
when ""
|
126
|
-
when UNKNOWN_CHAR_OUTPUT
|
127
|
-
ret += UNKNOWN_CHAR_OUTPUT
|
128
|
-
else
|
129
|
-
ret += out_charset[token].str
|
130
|
-
end
|
131
|
-
}
|
132
|
-
ret
|
119
|
+
# Return token list
|
120
|
+
res
|
133
121
|
end
|
134
122
|
|
135
123
|
end
|
data/lib/glaemscribe.rb
CHANGED
@@ -65,6 +65,7 @@ module Glaemscribe
|
|
65
65
|
require API_PATH + "/api/pre_processor/rxsubstitute.rb"
|
66
66
|
require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
|
67
67
|
require API_PATH + "/api/post_processor/reverse.rb"
|
68
|
+
require API_PATH + "/api/post_processor/csub.rb"
|
68
69
|
|
69
70
|
end
|
70
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: glaemscribe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin 'Talagan' Babut
|
@@ -44,6 +44,9 @@ files:
|
|
44
44
|
- glaemresources/modes/adunaic.glaem
|
45
45
|
- glaemresources/modes/blackspeech-annatar.glaem
|
46
46
|
- glaemresources/modes/blackspeech.glaem
|
47
|
+
- glaemresources/modes/futhark-long-branch.glaem.disabled
|
48
|
+
- glaemresources/modes/futhark-short-twig.glaem.disabled
|
49
|
+
- glaemresources/modes/futhorc.glaem.disabled
|
47
50
|
- glaemresources/modes/gothic.glaem
|
48
51
|
- glaemresources/modes/khuzdul.glaem
|
49
52
|
- glaemresources/modes/mercian.glaem
|
@@ -69,6 +72,7 @@ files:
|
|
69
72
|
- lib/api/mode.rb
|
70
73
|
- lib/api/mode_parser.rb
|
71
74
|
- lib/api/option.rb
|
75
|
+
- lib/api/post_processor/csub.rb
|
72
76
|
- lib/api/post_processor/reverse.rb
|
73
77
|
- lib/api/pre_processor/downcase.rb
|
74
78
|
- lib/api/pre_processor/elvish_numbers.rb
|