glaemscribe 1.0.13 → 1.0.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class CSubPostProcessorOperator < PostProcessorOperator
27
+ attr_reader :matcher
28
+ attr_reader :triggers
29
+
30
+ def initialize(args)
31
+ super(args)
32
+
33
+ # Build our operator
34
+ @matcher = self.raw_args[0]
35
+ @triggers = Hash.new
36
+
37
+ self.raw_args.each{ |arg|
38
+
39
+ splitted = arg.split()
40
+ replacer = splitted.shift()
41
+
42
+ splitted.each{ |token|
43
+ @triggers[token] = replacer
44
+ }
45
+ }
46
+ end
47
+
48
+ def apply(tokens)
49
+ last_trigger_replacer = nil
50
+ tokens.each_with_index{ |token,idx|
51
+ if token == @matcher && last_trigger_replacer != nil
52
+ tokens[idx] = last_trigger_replacer
53
+ elsif @triggers[token] != nil
54
+ last_trigger_replacer = @triggers[token]
55
+ end
56
+ }
57
+ tokens
58
+ end
59
+ end
60
+
61
+ ResourceManager::register_post_processor_class("csub", CSubPostProcessorOperator)
62
+
63
+ end
64
+ end
@@ -24,9 +24,8 @@ module Glaemscribe
24
24
  module API
25
25
 
26
26
  class ReversePostProcessorOperator < PostProcessorOperator
27
-
28
- def apply(l)
29
- l.reverse
27
+ def apply(tokens)
28
+ tokens.reverse
30
29
  end
31
30
  end
32
31
 
data/lib/api/sheaf.rb CHANGED
@@ -35,6 +35,8 @@ module Glaemscribe
35
35
  attr_reader :sheaf_chain
36
36
  attr_reader :mode
37
37
  attr_reader :rule
38
+
39
+ attr_reader :expression
38
40
 
39
41
  SHEAF_SEPARATOR = "*"
40
42
 
@@ -49,6 +49,8 @@ module Glaemscribe
49
49
  attr_reader :mode
50
50
  attr_reader :rule
51
51
 
52
+ attr_reader :expression
53
+
52
54
  def src? ; is_src ; end
53
55
  def dst? ; !is_src ; end
54
56
 
@@ -25,6 +25,7 @@ module Glaemscribe
25
25
 
26
26
  class PrePostProcessorOperator
27
27
  attr_reader :args
28
+ attr_reader :raw_args
28
29
 
29
30
  def initialize(raw_args)
30
31
  @raw_args = raw_args
@@ -46,7 +47,7 @@ module Glaemscribe
46
47
  }
47
48
  end
48
49
 
49
- def apply(l)
50
+ def apply
50
51
  raise "Pure virtual method, should be overloaded."
51
52
  end
52
53
  end
@@ -90,7 +91,16 @@ module Glaemscribe
90
91
  op.finalize(trans_options)
91
92
  }
92
93
  end
93
-
94
+
95
+ end
96
+
97
+ class PreProcessorOperator < PrePostProcessorOperator
98
+ end
99
+
100
+ class PostProcessorOperator < PrePostProcessorOperator
101
+ end
102
+
103
+ class TranscriptionPreProcessor < TranscriptionPrePostProcessor
94
104
  # Apply all preprocessor rules consecutively
95
105
  def apply(l)
96
106
  ret = l
@@ -99,19 +109,39 @@ module Glaemscribe
99
109
  }
100
110
  ret
101
111
  end
102
-
103
- end
104
-
105
- class PreProcessorOperator < PrePostProcessorOperator
106
- end
107
-
108
- class TranscriptionPreProcessor < TranscriptionPrePostProcessor
109
- end
110
-
111
- class PostProcessorOperator < PrePostProcessorOperator
112
112
  end
113
113
 
114
- class TranscriptionPostProcessor < TranscriptionPrePostProcessor
114
+ class TranscriptionPostProcessor < TranscriptionPrePostProcessor
115
+
116
+ attr_accessor :out_space
117
+
118
+ def apply(tokens, out_charset)
119
+
120
+ out_space_str = " "
121
+ out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
122
+
123
+ # Apply filters
124
+ @operators.each{ |operator|
125
+ tokens = operator.apply(tokens)
126
+ }
127
+
128
+ # Convert output
129
+ ret = ""
130
+ tokens.each{ |token|
131
+ case token
132
+ when ""
133
+ when "*UNKNOWN"
134
+ ret += UNKNOWN_CHAR_OUTPUT
135
+ when "*SPACE"
136
+ ret += out_space_str
137
+ when "*LF"
138
+ ret += "\n"
139
+ else
140
+ ret += out_charset[token].str
141
+ end
142
+ }
143
+ ret
144
+ end
115
145
  end
116
146
 
117
147
  end
@@ -26,9 +26,7 @@ module Glaemscribe
26
26
 
27
27
  attr_reader :rule_groups
28
28
  attr_reader :mode
29
-
30
- attr_accessor :out_space
31
-
29
+
32
30
  def initialize(mode)
33
31
  @mode = mode
34
32
  @rule_groups = {}
@@ -58,7 +56,7 @@ module Glaemscribe
58
56
  rg.in_charset.each{ |char, group|
59
57
  group_for_char = @in_charset[char]
60
58
  if group_for_char
61
- mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
59
+ mode.errors << Glaeml::Error.new(-1,"Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups.")
62
60
  else
63
61
  @in_charset[char] = group
64
62
  end
@@ -75,24 +73,23 @@ module Glaemscribe
75
73
  }
76
74
  end
77
75
 
78
- def apply(l, out_charset)
79
- ret = ""
76
+ def apply(l)
77
+ ret = []
80
78
  current_group = nil
81
79
  accumulated_word = ""
82
-
83
- out_space_str = " "
84
- out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
85
-
80
+
86
81
  l.split("").each{ |c|
87
82
  case c
88
83
  when " ", "\t"
89
- ret += transcribe_word(accumulated_word, out_charset)
90
- ret += out_space_str
84
+ ret += transcribe_word(accumulated_word)
85
+ ret += ["*SPACE"]
91
86
 
92
87
  accumulated_word = ""
93
- when "\r", "\n"
94
- ret += transcribe_word(accumulated_word, out_charset)
95
- ret += c
88
+ when "\r"
89
+ # Ignore
90
+ when "\n"
91
+ ret += transcribe_word(accumulated_word)
92
+ ret += ["*LF"]
96
93
 
97
94
  accumulated_word = ""
98
95
  else
@@ -100,18 +97,18 @@ module Glaemscribe
100
97
  if c_group == current_group
101
98
  accumulated_word += c
102
99
  else
103
- ret += transcribe_word(accumulated_word, out_charset)
100
+ ret += transcribe_word(accumulated_word)
104
101
  current_group = c_group
105
102
  accumulated_word = c
106
103
  end
107
104
  end
108
105
  }
109
106
  # Just in case
110
- ret += transcribe_word(accumulated_word, out_charset)
107
+ ret += transcribe_word(accumulated_word)
111
108
  ret
112
109
  end
113
110
 
114
- def transcribe_word(word, out_charset)
111
+ def transcribe_word(word)
115
112
  res = []
116
113
  word = WORD_BOUNDARY + word + WORD_BOUNDARY
117
114
  while word.length != 0
@@ -119,17 +116,8 @@ module Glaemscribe
119
116
  word = word[len..-1]
120
117
  res += r
121
118
  end
122
- ret = ""
123
- res.each{ |token|
124
- case token
125
- when ""
126
- when UNKNOWN_CHAR_OUTPUT
127
- ret += UNKNOWN_CHAR_OUTPUT
128
- else
129
- ret += out_charset[token].str
130
- end
131
- }
132
- ret
119
+ # Return token list
120
+ res
133
121
  end
134
122
 
135
123
  end
@@ -84,7 +84,7 @@ module Glaemscribe
84
84
  end
85
85
 
86
86
  # Only the root node is in the chain, we could not find anything; return the "unknown char"
87
- return [UNKNOWN_CHAR_OUTPUT], 1
87
+ return ["*UNKNOWN"], 1
88
88
  end
89
89
  end
90
90
  end
data/lib/glaemscribe.rb CHANGED
@@ -65,6 +65,7 @@ module Glaemscribe
65
65
  require API_PATH + "/api/pre_processor/rxsubstitute.rb"
66
66
  require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
67
67
  require API_PATH + "/api/post_processor/reverse.rb"
68
+ require API_PATH + "/api/post_processor/csub.rb"
68
69
 
69
70
  end
70
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: glaemscribe
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.13
4
+ version: 1.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin 'Talagan' Babut
@@ -44,6 +44,9 @@ files:
44
44
  - glaemresources/modes/adunaic.glaem
45
45
  - glaemresources/modes/blackspeech-annatar.glaem
46
46
  - glaemresources/modes/blackspeech.glaem
47
+ - glaemresources/modes/futhark-long-branch.glaem.disabled
48
+ - glaemresources/modes/futhark-short-twig.glaem.disabled
49
+ - glaemresources/modes/futhorc.glaem.disabled
47
50
  - glaemresources/modes/gothic.glaem
48
51
  - glaemresources/modes/khuzdul.glaem
49
52
  - glaemresources/modes/mercian.glaem
@@ -69,6 +72,7 @@ files:
69
72
  - lib/api/mode.rb
70
73
  - lib/api/mode_parser.rb
71
74
  - lib/api/option.rb
75
+ - lib/api/post_processor/csub.rb
72
76
  - lib/api/post_processor/reverse.rb
73
77
  - lib/api/pre_processor/downcase.rb
74
78
  - lib/api/pre_processor/elvish_numbers.rb