glaemscribe 1.0.13 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/glaemresources/charsets/tengwar_ds.cst +20 -11
- data/glaemresources/modes/futhark-long-branch.glaem.disabled +101 -0
- data/glaemresources/modes/futhark-short-twig.glaem.disabled +101 -0
- data/glaemresources/modes/futhorc.glaem.disabled +123 -0
- data/glaemresources/modes/khuzdul.glaem +11 -4
- data/glaemresources/modes/oldnorse-medieval.glaem +1 -1
- data/glaemresources/modes/quenya-sarati.glaem +9 -3
- data/glaemresources/modes/quenya.glaem +215 -173
- data/glaemresources/modes/sindarin-daeron.glaem +6 -6
- data/glaemresources/modes/valarin-sarati.glaem +5 -1
- data/lib/api/fragment.rb +1 -1
- data/lib/api/mode.rb +2 -2
- data/lib/api/mode_parser.rb +6 -11
- data/lib/api/post_processor/csub.rb +64 -0
- data/lib/api/post_processor/reverse.rb +2 -3
- data/lib/api/sheaf.rb +2 -0
- data/lib/api/sheaf_chain.rb +2 -0
- data/lib/api/transcription_pre_post_processor.rb +43 -13
- data/lib/api/transcription_processor.rb +17 -29
- data/lib/api/transcription_tree_node.rb +1 -1
- data/lib/glaemscribe.rb +1 -0
- metadata +5 -1
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
# the transcription of texts between writing systems, and more
|
5
|
+
# specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
# invented languages to some of his devised writing systems.
|
7
|
+
#
|
8
|
+
# Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
#
|
10
|
+
# This program is free software: you can redistribute it and/or modify
|
11
|
+
# it under the terms of the GNU Affero General Public License as published by
|
12
|
+
# the Free Software Foundation, either version 3 of the License, or
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful,
|
16
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
# GNU Affero General Public License for more details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License
|
21
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
module Glaemscribe
|
24
|
+
module API
|
25
|
+
|
26
|
+
class CSubPostProcessorOperator < PostProcessorOperator
|
27
|
+
attr_reader :matcher
|
28
|
+
attr_reader :triggers
|
29
|
+
|
30
|
+
def initialize(args)
|
31
|
+
super(args)
|
32
|
+
|
33
|
+
# Build our operator
|
34
|
+
@matcher = self.raw_args[0]
|
35
|
+
@triggers = Hash.new
|
36
|
+
|
37
|
+
self.raw_args.each{ |arg|
|
38
|
+
|
39
|
+
splitted = arg.split()
|
40
|
+
replacer = splitted.shift()
|
41
|
+
|
42
|
+
splitted.each{ |token|
|
43
|
+
@triggers[token] = replacer
|
44
|
+
}
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def apply(tokens)
|
49
|
+
last_trigger_replacer = nil
|
50
|
+
tokens.each_with_index{ |token,idx|
|
51
|
+
if token == @matcher && last_trigger_replacer != nil
|
52
|
+
tokens[idx] = last_trigger_replacer
|
53
|
+
elsif @triggers[token] != nil
|
54
|
+
last_trigger_replacer = @triggers[token]
|
55
|
+
end
|
56
|
+
}
|
57
|
+
tokens
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
ResourceManager::register_post_processor_class("csub", CSubPostProcessorOperator)
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
data/lib/api/sheaf.rb
CHANGED
data/lib/api/sheaf_chain.rb
CHANGED
@@ -25,6 +25,7 @@ module Glaemscribe
|
|
25
25
|
|
26
26
|
class PrePostProcessorOperator
|
27
27
|
attr_reader :args
|
28
|
+
attr_reader :raw_args
|
28
29
|
|
29
30
|
def initialize(raw_args)
|
30
31
|
@raw_args = raw_args
|
@@ -46,7 +47,7 @@ module Glaemscribe
|
|
46
47
|
}
|
47
48
|
end
|
48
49
|
|
49
|
-
def apply
|
50
|
+
def apply
|
50
51
|
raise "Pure virtual method, should be overloaded."
|
51
52
|
end
|
52
53
|
end
|
@@ -90,7 +91,16 @@ module Glaemscribe
|
|
90
91
|
op.finalize(trans_options)
|
91
92
|
}
|
92
93
|
end
|
93
|
-
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
class PreProcessorOperator < PrePostProcessorOperator
|
98
|
+
end
|
99
|
+
|
100
|
+
class PostProcessorOperator < PrePostProcessorOperator
|
101
|
+
end
|
102
|
+
|
103
|
+
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
94
104
|
# Apply all preprocessor rules consecutively
|
95
105
|
def apply(l)
|
96
106
|
ret = l
|
@@ -99,19 +109,39 @@ module Glaemscribe
|
|
99
109
|
}
|
100
110
|
ret
|
101
111
|
end
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
class PreProcessorOperator < PrePostProcessorOperator
|
106
|
-
end
|
107
|
-
|
108
|
-
class TranscriptionPreProcessor < TranscriptionPrePostProcessor
|
109
|
-
end
|
110
|
-
|
111
|
-
class PostProcessorOperator < PrePostProcessorOperator
|
112
112
|
end
|
113
113
|
|
114
|
-
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
114
|
+
class TranscriptionPostProcessor < TranscriptionPrePostProcessor
|
115
|
+
|
116
|
+
attr_accessor :out_space
|
117
|
+
|
118
|
+
def apply(tokens, out_charset)
|
119
|
+
|
120
|
+
out_space_str = " "
|
121
|
+
out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
|
122
|
+
|
123
|
+
# Apply filters
|
124
|
+
@operators.each{ |operator|
|
125
|
+
tokens = operator.apply(tokens)
|
126
|
+
}
|
127
|
+
|
128
|
+
# Convert output
|
129
|
+
ret = ""
|
130
|
+
tokens.each{ |token|
|
131
|
+
case token
|
132
|
+
when ""
|
133
|
+
when "*UNKNOWN"
|
134
|
+
ret += UNKNOWN_CHAR_OUTPUT
|
135
|
+
when "*SPACE"
|
136
|
+
ret += out_space_str
|
137
|
+
when "*LF"
|
138
|
+
ret += "\n"
|
139
|
+
else
|
140
|
+
ret += out_charset[token].str
|
141
|
+
end
|
142
|
+
}
|
143
|
+
ret
|
144
|
+
end
|
115
145
|
end
|
116
146
|
|
117
147
|
end
|
@@ -26,9 +26,7 @@ module Glaemscribe
|
|
26
26
|
|
27
27
|
attr_reader :rule_groups
|
28
28
|
attr_reader :mode
|
29
|
-
|
30
|
-
attr_accessor :out_space
|
31
|
-
|
29
|
+
|
32
30
|
def initialize(mode)
|
33
31
|
@mode = mode
|
34
32
|
@rule_groups = {}
|
@@ -58,7 +56,7 @@ module Glaemscribe
|
|
58
56
|
rg.in_charset.each{ |char, group|
|
59
57
|
group_for_char = @in_charset[char]
|
60
58
|
if group_for_char
|
61
|
-
mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
|
59
|
+
mode.errors << Glaeml::Error.new(-1,"Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups.")
|
62
60
|
else
|
63
61
|
@in_charset[char] = group
|
64
62
|
end
|
@@ -75,24 +73,23 @@ module Glaemscribe
|
|
75
73
|
}
|
76
74
|
end
|
77
75
|
|
78
|
-
def apply(l
|
79
|
-
ret =
|
76
|
+
def apply(l)
|
77
|
+
ret = []
|
80
78
|
current_group = nil
|
81
79
|
accumulated_word = ""
|
82
|
-
|
83
|
-
out_space_str = " "
|
84
|
-
out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
|
85
|
-
|
80
|
+
|
86
81
|
l.split("").each{ |c|
|
87
82
|
case c
|
88
83
|
when " ", "\t"
|
89
|
-
ret += transcribe_word(accumulated_word
|
90
|
-
ret +=
|
84
|
+
ret += transcribe_word(accumulated_word)
|
85
|
+
ret += ["*SPACE"]
|
91
86
|
|
92
87
|
accumulated_word = ""
|
93
|
-
when "\r"
|
94
|
-
|
95
|
-
|
88
|
+
when "\r"
|
89
|
+
# Ignore
|
90
|
+
when "\n"
|
91
|
+
ret += transcribe_word(accumulated_word)
|
92
|
+
ret += ["*LF"]
|
96
93
|
|
97
94
|
accumulated_word = ""
|
98
95
|
else
|
@@ -100,18 +97,18 @@ module Glaemscribe
|
|
100
97
|
if c_group == current_group
|
101
98
|
accumulated_word += c
|
102
99
|
else
|
103
|
-
ret += transcribe_word(accumulated_word
|
100
|
+
ret += transcribe_word(accumulated_word)
|
104
101
|
current_group = c_group
|
105
102
|
accumulated_word = c
|
106
103
|
end
|
107
104
|
end
|
108
105
|
}
|
109
106
|
# Just in case
|
110
|
-
ret += transcribe_word(accumulated_word
|
107
|
+
ret += transcribe_word(accumulated_word)
|
111
108
|
ret
|
112
109
|
end
|
113
110
|
|
114
|
-
def transcribe_word(word
|
111
|
+
def transcribe_word(word)
|
115
112
|
res = []
|
116
113
|
word = WORD_BOUNDARY + word + WORD_BOUNDARY
|
117
114
|
while word.length != 0
|
@@ -119,17 +116,8 @@ module Glaemscribe
|
|
119
116
|
word = word[len..-1]
|
120
117
|
res += r
|
121
118
|
end
|
122
|
-
|
123
|
-
res
|
124
|
-
case token
|
125
|
-
when ""
|
126
|
-
when UNKNOWN_CHAR_OUTPUT
|
127
|
-
ret += UNKNOWN_CHAR_OUTPUT
|
128
|
-
else
|
129
|
-
ret += out_charset[token].str
|
130
|
-
end
|
131
|
-
}
|
132
|
-
ret
|
119
|
+
# Return token list
|
120
|
+
res
|
133
121
|
end
|
134
122
|
|
135
123
|
end
|
data/lib/glaemscribe.rb
CHANGED
@@ -65,6 +65,7 @@ module Glaemscribe
|
|
65
65
|
require API_PATH + "/api/pre_processor/rxsubstitute.rb"
|
66
66
|
require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
|
67
67
|
require API_PATH + "/api/post_processor/reverse.rb"
|
68
|
+
require API_PATH + "/api/post_processor/csub.rb"
|
68
69
|
|
69
70
|
end
|
70
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: glaemscribe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin 'Talagan' Babut
|
@@ -44,6 +44,9 @@ files:
|
|
44
44
|
- glaemresources/modes/adunaic.glaem
|
45
45
|
- glaemresources/modes/blackspeech-annatar.glaem
|
46
46
|
- glaemresources/modes/blackspeech.glaem
|
47
|
+
- glaemresources/modes/futhark-long-branch.glaem.disabled
|
48
|
+
- glaemresources/modes/futhark-short-twig.glaem.disabled
|
49
|
+
- glaemresources/modes/futhorc.glaem.disabled
|
47
50
|
- glaemresources/modes/gothic.glaem
|
48
51
|
- glaemresources/modes/khuzdul.glaem
|
49
52
|
- glaemresources/modes/mercian.glaem
|
@@ -69,6 +72,7 @@ files:
|
|
69
72
|
- lib/api/mode.rb
|
70
73
|
- lib/api/mode_parser.rb
|
71
74
|
- lib/api/option.rb
|
75
|
+
- lib/api/post_processor/csub.rb
|
72
76
|
- lib/api/post_processor/reverse.rb
|
73
77
|
- lib/api/pre_processor/downcase.rb
|
74
78
|
- lib/api/pre_processor/elvish_numbers.rb
|