glaemscribe 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class CSubPostProcessorOperator < PostProcessorOperator
27
+ attr_reader :matcher
28
+ attr_reader :triggers
29
+
30
+ def initialize(args)
31
+ super(args)
32
+
33
+ # Build our operator
34
+ @matcher = self.raw_args[0]
35
+ @triggers = Hash.new
36
+
37
+ self.raw_args.each{ |arg|
38
+
39
+ splitted = arg.split()
40
+ replacer = splitted.shift()
41
+
42
+ splitted.each{ |token|
43
+ @triggers[token] = replacer
44
+ }
45
+ }
46
+ end
47
+
48
+ def apply(tokens)
49
+ last_trigger_replacer = nil
50
+ tokens.each_with_index{ |token,idx|
51
+ if token == @matcher && last_trigger_replacer != nil
52
+ tokens[idx] = last_trigger_replacer
53
+ elsif @triggers[token] != nil
54
+ last_trigger_replacer = @triggers[token]
55
+ end
56
+ }
57
+ tokens
58
+ end
59
+ end
60
+
61
+ ResourceManager::register_post_processor_class("csub", CSubPostProcessorOperator)
62
+
63
+ end
64
+ end
@@ -24,9 +24,8 @@ module Glaemscribe
24
24
  module API
25
25
 
26
26
  class ReversePostProcessorOperator < PostProcessorOperator
27
-
28
- def apply(l)
29
- l.reverse
27
+ def apply(tokens)
28
+ tokens.reverse
30
29
  end
31
30
  end
32
31
 
data/lib/api/sheaf.rb CHANGED
@@ -35,6 +35,8 @@ module Glaemscribe
35
35
  attr_reader :sheaf_chain
36
36
  attr_reader :mode
37
37
  attr_reader :rule
38
+
39
+ attr_reader :expression
38
40
 
39
41
  SHEAF_SEPARATOR = "*"
40
42
 
@@ -49,6 +49,8 @@ module Glaemscribe
49
49
  attr_reader :mode
50
50
  attr_reader :rule
51
51
 
52
+ attr_reader :expression
53
+
52
54
  def src? ; is_src ; end
53
55
  def dst? ; !is_src ; end
54
56
 
@@ -25,6 +25,7 @@ module Glaemscribe
25
25
 
26
26
  class PrePostProcessorOperator
27
27
  attr_reader :args
28
+ attr_reader :raw_args
28
29
 
29
30
  def initialize(raw_args)
30
31
  @raw_args = raw_args
@@ -46,7 +47,7 @@ module Glaemscribe
46
47
  }
47
48
  end
48
49
 
49
- def apply(l)
50
+ def apply
50
51
  raise "Pure virtual method, should be overloaded."
51
52
  end
52
53
  end
@@ -90,7 +91,16 @@ module Glaemscribe
90
91
  op.finalize(trans_options)
91
92
  }
92
93
  end
93
-
94
+
95
+ end
96
+
97
+ class PreProcessorOperator < PrePostProcessorOperator
98
+ end
99
+
100
+ class PostProcessorOperator < PrePostProcessorOperator
101
+ end
102
+
103
+ class TranscriptionPreProcessor < TranscriptionPrePostProcessor
94
104
  # Apply all preprocessor rules consecutively
95
105
  def apply(l)
96
106
  ret = l
@@ -99,19 +109,39 @@ module Glaemscribe
99
109
  }
100
110
  ret
101
111
  end
102
-
103
- end
104
-
105
- class PreProcessorOperator < PrePostProcessorOperator
106
- end
107
-
108
- class TranscriptionPreProcessor < TranscriptionPrePostProcessor
109
- end
110
-
111
- class PostProcessorOperator < PrePostProcessorOperator
112
112
  end
113
113
 
114
- class TranscriptionPostProcessor < TranscriptionPrePostProcessor
114
+ class TranscriptionPostProcessor < TranscriptionPrePostProcessor
115
+
116
+ attr_accessor :out_space
117
+
118
+ def apply(tokens, out_charset)
119
+
120
+ out_space_str = " "
121
+ out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
122
+
123
+ # Apply filters
124
+ @operators.each{ |operator|
125
+ tokens = operator.apply(tokens)
126
+ }
127
+
128
+ # Convert output
129
+ ret = ""
130
+ tokens.each{ |token|
131
+ case token
132
+ when ""
133
+ when "*UNKNOWN"
134
+ ret += UNKNOWN_CHAR_OUTPUT
135
+ when "*SPACE"
136
+ ret += out_space_str
137
+ when "*LF"
138
+ ret += "\n"
139
+ else
140
+ ret += out_charset[token].str
141
+ end
142
+ }
143
+ ret
144
+ end
115
145
  end
116
146
 
117
147
  end
@@ -26,9 +26,7 @@ module Glaemscribe
26
26
 
27
27
  attr_reader :rule_groups
28
28
  attr_reader :mode
29
-
30
- attr_accessor :out_space
31
-
29
+
32
30
  def initialize(mode)
33
31
  @mode = mode
34
32
  @rule_groups = {}
@@ -58,7 +56,7 @@ module Glaemscribe
58
56
  rg.in_charset.each{ |char, group|
59
57
  group_for_char = @in_charset[char]
60
58
  if group_for_char
61
- mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups."
59
+ mode.errors << Glaeml::Error.new(-1,"Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups.")
62
60
  else
63
61
  @in_charset[char] = group
64
62
  end
@@ -75,24 +73,23 @@ module Glaemscribe
75
73
  }
76
74
  end
77
75
 
78
- def apply(l, out_charset)
79
- ret = ""
76
+ def apply(l)
77
+ ret = []
80
78
  current_group = nil
81
79
  accumulated_word = ""
82
-
83
- out_space_str = " "
84
- out_space_str = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
85
-
80
+
86
81
  l.split("").each{ |c|
87
82
  case c
88
83
  when " ", "\t"
89
- ret += transcribe_word(accumulated_word, out_charset)
90
- ret += out_space_str
84
+ ret += transcribe_word(accumulated_word)
85
+ ret += ["*SPACE"]
91
86
 
92
87
  accumulated_word = ""
93
- when "\r", "\n"
94
- ret += transcribe_word(accumulated_word, out_charset)
95
- ret += c
88
+ when "\r"
89
+ # Ignore
90
+ when "\n"
91
+ ret += transcribe_word(accumulated_word)
92
+ ret += ["*LF"]
96
93
 
97
94
  accumulated_word = ""
98
95
  else
@@ -100,18 +97,18 @@ module Glaemscribe
100
97
  if c_group == current_group
101
98
  accumulated_word += c
102
99
  else
103
- ret += transcribe_word(accumulated_word, out_charset)
100
+ ret += transcribe_word(accumulated_word)
104
101
  current_group = c_group
105
102
  accumulated_word = c
106
103
  end
107
104
  end
108
105
  }
109
106
  # Just in case
110
- ret += transcribe_word(accumulated_word, out_charset)
107
+ ret += transcribe_word(accumulated_word)
111
108
  ret
112
109
  end
113
110
 
114
- def transcribe_word(word, out_charset)
111
+ def transcribe_word(word)
115
112
  res = []
116
113
  word = WORD_BOUNDARY + word + WORD_BOUNDARY
117
114
  while word.length != 0
@@ -119,17 +116,8 @@ module Glaemscribe
119
116
  word = word[len..-1]
120
117
  res += r
121
118
  end
122
- ret = ""
123
- res.each{ |token|
124
- case token
125
- when ""
126
- when UNKNOWN_CHAR_OUTPUT
127
- ret += UNKNOWN_CHAR_OUTPUT
128
- else
129
- ret += out_charset[token].str
130
- end
131
- }
132
- ret
119
+ # Return token list
120
+ res
133
121
  end
134
122
 
135
123
  end
@@ -84,7 +84,7 @@ module Glaemscribe
84
84
  end
85
85
 
86
86
  # Only the root node is in the chain, we could not find anything; return the "unknown char"
87
- return [UNKNOWN_CHAR_OUTPUT], 1
87
+ return ["*UNKNOWN"], 1
88
88
  end
89
89
  end
90
90
  end
data/lib/glaemscribe.rb CHANGED
@@ -65,6 +65,7 @@ module Glaemscribe
65
65
  require API_PATH + "/api/pre_processor/rxsubstitute.rb"
66
66
  require API_PATH + "/api/pre_processor/up_down_tehta_split.rb"
67
67
  require API_PATH + "/api/post_processor/reverse.rb"
68
+ require API_PATH + "/api/post_processor/csub.rb"
68
69
 
69
70
  end
70
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: glaemscribe
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.13
4
+ version: 1.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin 'Talagan' Babut
@@ -44,6 +44,9 @@ files:
44
44
  - glaemresources/modes/adunaic.glaem
45
45
  - glaemresources/modes/blackspeech-annatar.glaem
46
46
  - glaemresources/modes/blackspeech.glaem
47
+ - glaemresources/modes/futhark-long-branch.glaem.disabled
48
+ - glaemresources/modes/futhark-short-twig.glaem.disabled
49
+ - glaemresources/modes/futhorc.glaem.disabled
47
50
  - glaemresources/modes/gothic.glaem
48
51
  - glaemresources/modes/khuzdul.glaem
49
52
  - glaemresources/modes/mercian.glaem
@@ -69,6 +72,7 @@ files:
69
72
  - lib/api/mode.rb
70
73
  - lib/api/mode_parser.rb
71
74
  - lib/api/option.rb
75
+ - lib/api/post_processor/csub.rb
72
76
  - lib/api/post_processor/reverse.rb
73
77
  - lib/api/pre_processor/downcase.rb
74
78
  - lib/api/pre_processor/elvish_numbers.rb