blackwinter-athena 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,141 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'iconv'
30
+ require 'enumerator'
31
+
32
+ class Athena::Formats
33
+
34
+ module Lingo
35
+
36
+ class Base < Athena::Formats
37
+
38
+ class << self
39
+
40
+ def convert(record)
41
+ record.struct.inject([]) { |terms, (field, struct)|
42
+ terms << struct[:elements].inject([]) { |array, element|
43
+ array += (struct[:values][element] || []).map { |v|
44
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
45
+ }.reject { |v| v.empty? }
46
+ }
47
+ }
48
+ end
49
+
50
+ def deferred?
51
+ true
52
+ end
53
+
54
+ private
55
+
56
+ def check_number_of_arguments(expected, actual, blow = false, &block)
57
+ return true if block ? block[actual] : expected == actual
58
+
59
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
60
+
61
+ if blow
62
+ raise FormatArgumentError, msg
63
+ else
64
+ warn msg
65
+ return false
66
+ end
67
+ end
68
+
69
+ def check_number_of_arguments!(expected, actual, &block)
70
+ check_number_of_arguments(expected, actual, true, &block)
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+
77
+ # "Nasenbär\n"
78
+ class SingleWord < Athena::Formats::Lingo::Base
79
+
80
+ register_formats :out, 'lingo/single_word'
81
+
82
+ def self.convert(record)
83
+ super.flatten
84
+ end
85
+
86
+ end
87
+
88
+ # "John Vorhauer*Vorhauer, John\n"
89
+ class KeyValue < Athena::Formats::Lingo::Base
90
+
91
+ register_formats :out, 'lingo/key_value'
92
+
93
+ def self.convert(record)
94
+ super.map { |terms|
95
+ next unless check_number_of_arguments(2, terms.size)
96
+
97
+ terms.join('*')
98
+ }.compact
99
+ end
100
+
101
+ end
102
+
103
+ # "Essen,essen #v Essen #s Esse #s\n"
104
+ class WordClass < Athena::Formats::Lingo::Base
105
+
106
+ register_formats :out, 'lingo/word_class'
107
+
108
+ def self.convert(record)
109
+ super.map { |terms|
110
+ next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
111
+ actual > 1 && actual % 2 == 1
112
+ }
113
+
114
+ [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
115
+ "#{form} ##{wc}"
116
+ }.join(' ')].join(',')
117
+ }.compact
118
+ end
119
+
120
+ end
121
+
122
+ # "Fax;Faxkopie;Telefax\n"
123
+ class MultiValue < Athena::Formats::Lingo::Base
124
+
125
+ register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
126
+
127
+ def self.convert(record)
128
+ super.map { |terms|
129
+ next unless check_number_of_arguments('> 1', terms.size) { |actual|
130
+ actual > 1
131
+ }
132
+
133
+ terms.join(';')
134
+ }.compact
135
+ end
136
+
137
+ end
138
+
139
+ end
140
+
141
+ end
@@ -0,0 +1,79 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ class Athena::Formats
30
+
31
+ class Sisis < Athena::Formats
32
+
33
+ register_format :in, 'sisis'
34
+
35
+ attr_reader :record_element, :config, :parser
36
+
37
+ def initialize(parser)
38
+ config = parser.config.dup
39
+
40
+ case @record_element = config.delete(:__record_element)
41
+ when String
42
+ # fine!
43
+ when nil
44
+ raise NoRecordElementError, 'no record element specified'
45
+ else
46
+ raise IllegalRecordElementError, "illegal record element #{@record_element}"
47
+ end
48
+
49
+ @config = config
50
+ @parser = parser
51
+ end
52
+
53
+ def parse(source)
54
+ record = nil
55
+
56
+ source.each { |line|
57
+ element, value = line.match(/(\d+).*?:\s*(.*)/)[1, 2]
58
+
59
+ case element
60
+ when record_element
61
+ record.close if record
62
+ record = Athena::Record.new(parser.block, value)
63
+ else
64
+ record.update(element, value, config[element])
65
+ end
66
+ }
67
+
68
+ record.close if record
69
+ end
70
+
71
+ class NoRecordElementError < StandardError
72
+ end
73
+
74
+ class IllegalRecordElementError < StandardError
75
+ end
76
+
77
+ end
78
+
79
+ end
@@ -0,0 +1,274 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'forwardable'
30
+
31
+ require 'rubygems'
32
+
33
+ require 'xmlstreamin'
34
+ require 'nuggets/hash/insert'
35
+
36
+ class Athena::Formats
37
+
38
+ class XML < Athena::Formats
39
+
40
+ include Athena::Util
41
+
42
+ register_format :in, 'xml'
43
+
44
+ attr_reader :spec, :listener
45
+
46
+ def initialize(parser)
47
+ @spec = build_spec(parser)
48
+ @listener = XMLStreamin::XMLStreamListener.new(@spec)
49
+ end
50
+
51
+ def parse(source)
52
+ REXML::Document.parse_stream(source, listener)
53
+ end
54
+
55
+ private
56
+
57
+ def build_spec(parser)
58
+ config = parser.config.dup
59
+
60
+ case record_element = config.delete(:__record_element)
61
+ when String
62
+ # fine!
63
+ when nil
64
+ raise NoRecordElementError, 'no record element specified'
65
+ else
66
+ raise IllegalRecordElementError, "illegal record element #{record_element}"
67
+ end
68
+
69
+ element_specs = config.inject({}) { |specs, (element, element_spec)|
70
+ element_spec.each { |field, c|
71
+ element.split('/').reverse.inject({}) { |hash, part|
72
+ s = define_spec(element, field, c, hash.empty? ? :default : hash)
73
+ merge_specs(hash, part, s)
74
+ }.each { |key, s|
75
+ merge_specs(specs, key, s)
76
+ }
77
+ }
78
+
79
+ specs
80
+ }
81
+
82
+ record_spec = RecordSpec.new(parser)
83
+ record_spec.specs!(element_specs)
84
+
85
+ root_spec = BaseSpec.new
86
+ root_spec.specs!(record_element => record_spec)
87
+
88
+ spec = BaseSpec.new
89
+ spec.default!(root_spec)
90
+
91
+ verbose(:spec, BaseSpec) do
92
+ spec.inspect_spec
93
+ end
94
+
95
+ spec
96
+ end
97
+
98
+ def define_spec(element, field, config, arg)
99
+ spec = ElementSpec.new(element, field, config)
100
+
101
+ case arg
102
+ when Hash
103
+ spec.specs!(arg)
104
+ else
105
+ spec.default!(SubElementSpec.new(spec))
106
+ end
107
+
108
+ spec
109
+ end
110
+
111
+ def merge_specs(container, key, spec)
112
+ container.insert!(key, spec) { |s1, s2|
113
+ if s1.respond_to?(:specs!)
114
+ s1.specs!(s2.respond_to?(:specs) ? s2.specs : s2)
115
+ s1
116
+ else
117
+ s1.merge(s2)
118
+ end
119
+ }
120
+ end
121
+
122
+ class BaseSpec < XMLStreamin::XMLSpec
123
+
124
+ include Athena::Util
125
+
126
+ @level = 0
127
+
128
+ def start(context, name, attrs)
129
+ verbose(:xml) do
130
+ spit "#{indent(level)}<#{name}>"
131
+ step :down
132
+
133
+ attrs.each { |attr|
134
+ spit "#{indent(level + 1)}[#{attr[0]} = #{attr[1]}]"
135
+ }
136
+ end
137
+
138
+ return context
139
+ end
140
+
141
+ def text(context, data)
142
+ verbose(:xml) do
143
+ content = data.strip
144
+ spit "#{indent(level)}#{content}" unless content.empty?
145
+ end
146
+
147
+ return context
148
+ end
149
+
150
+ def done(context, name)
151
+ verbose(:xml) do
152
+ step :up
153
+ spit "#{indent(level)}</#{name}>"
154
+ end
155
+
156
+ return context
157
+ end
158
+
159
+ def empty(context)
160
+ verbose(:xml) do
161
+ step :up
162
+ end
163
+
164
+ return context
165
+ end
166
+
167
+ def inspect_spec(element = nil, level = 0)
168
+ if respond_to?(:field)
169
+ msg = "#{indent(level)}[#{element}] #{field.to_s.upcase} -> #{name}"
170
+ respond_to?(:spit) ? spit(msg) : warn(msg)
171
+ specs.each { |e, s|
172
+ s.inspect_spec(e, level + 1)
173
+ }
174
+ else
175
+ if specs.empty?
176
+ specs.default.inspect_spec('?', level)
177
+ else
178
+ specs.each { |e, s|
179
+ s.inspect_spec(e, level)
180
+ }
181
+ end
182
+ end
183
+ end
184
+
185
+ private
186
+
187
+ def level
188
+ BaseSpec.instance_variable_get(:@level)
189
+ end
190
+
191
+ def step(direction)
192
+ steps = { :down => 1, :up => -1 }
193
+ BaseSpec.instance_variable_set(:@level, level + steps[direction])
194
+ end
195
+
196
+ end
197
+
198
+ class RecordSpec < BaseSpec
199
+
200
+ attr_reader :parser
201
+ attr_accessor :record
202
+
203
+ def initialize(parser)
204
+ super()
205
+
206
+ @parser = parser
207
+ end
208
+
209
+ def start(context, name, attrs)
210
+ super
211
+
212
+ self.record = Athena::Record.new(parser.block)
213
+ end
214
+
215
+ def done(context, name)
216
+ super
217
+
218
+ record.close
219
+ end
220
+
221
+ end
222
+
223
+ class ElementSpec < BaseSpec
224
+
225
+ attr_reader :name, :field, :config
226
+ attr_accessor :record
227
+
228
+ def initialize(name, field, config)
229
+ super()
230
+
231
+ @name = name
232
+ @field = field
233
+ @config = config
234
+ end
235
+
236
+ def start(context, name, attrs)
237
+ super
238
+
239
+ self.record = Athena::Record[field, config]
240
+ end
241
+
242
+ def text(context, data)
243
+ super
244
+
245
+ record.update(name, data)
246
+ end
247
+
248
+ end
249
+
250
+ class SubElementSpec < BaseSpec
251
+
252
+ extend Forwardable
253
+
254
+ # Forward to parent element; need to specify *all* its attributes and methods
255
+ def_delegators :@parent, :name, :field, :config, :record, :start, :text
256
+
257
+ def initialize(parent)
258
+ super()
259
+
260
+ @parent = parent
261
+ default!(self)
262
+ end
263
+
264
+ end
265
+
266
+ class NoRecordElementError < StandardError
267
+ end
268
+
269
+ class IllegalRecordElementError < StandardError
270
+ end
271
+
272
+ end
273
+
274
+ end
@@ -0,0 +1,88 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ class Athena::Formats
30
+
31
+ @formats = { :in => {}, :out => {} }
32
+
33
+ class << self
34
+
35
+ def formats
36
+ Athena::Formats.instance_variable_get(:@formats)
37
+ end
38
+
39
+ def [](direction, format)
40
+ formats[direction][format]
41
+ end
42
+
43
+ def valid_format?(direction, format)
44
+ formats[direction].has_key?(format)
45
+ end
46
+
47
+ def deferred?
48
+ false
49
+ end
50
+
51
+ def convert(*args)
52
+ raise NotImplementedError, 'must be defined by sub-class'
53
+ end
54
+
55
+ private
56
+
57
+ def register_format(direction, format)
58
+ if existing = formats[direction][format]
59
+ raise DuplicateFormatDefinitionError,
60
+ "format already defined (#{direction}): #{format} = #{existing}"
61
+ end
62
+
63
+ formats[direction][format] = self
64
+ end
65
+
66
+ def register_formats(direction, *formats)
67
+ formats.each { |format|
68
+ register_format(direction, format)
69
+ }
70
+ end
71
+
72
+ end
73
+
74
+ def parse(*args)
75
+ raise NotImplementedError, 'must be defined by sub-class'
76
+ end
77
+
78
+ class DuplicateFormatDefinitionError < StandardError
79
+ end
80
+
81
+ class FormatArgumentError < ArgumentError
82
+ end
83
+
84
+ end
85
+
86
+ Dir[__FILE__.sub(/\.rb$/, '/**/*.rb')].each { |rb|
87
+ require rb
88
+ }
@@ -0,0 +1,90 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ class Athena::Parser
30
+
31
+ include Athena::Util
32
+
33
+ DEFAULT_SEPARATOR = ', '
34
+ DEFAULT_EMPTY = '<<EMPTY>>'
35
+
36
+ attr_reader :config, :spec
37
+ attr_accessor :block
38
+
39
+ def initialize(config, spec)
40
+ @config = build_config(config)
41
+ @spec = Athena::Formats[:in, spec].new(self)
42
+ end
43
+
44
+ def parse(source, &block)
45
+ self.block = block
46
+
47
+ spec.parse(source)
48
+ Athena::Record.records
49
+ end
50
+
51
+ private
52
+
53
+ def build_config(config)
54
+ config.inject({}) { |hash, (field, v)|
55
+ if field.to_s =~ /^__/
56
+ hash.merge(field => v)
57
+ else
58
+ case v
59
+ when String, Array
60
+ elements = [*v]
61
+ v = {}
62
+ when Hash
63
+ elements = v[:elements] || v[:element].to_a
64
+
65
+ raise ArgumentError, "no elements specified for field #{field}" \
66
+ unless elements.is_a?(Array)
67
+ else
68
+ raise ArgumentError, "illegal value for field #{field}"
69
+ end
70
+
71
+ separator = v[:separator] || DEFAULT_SEPARATOR
72
+
73
+ elements.each { |element|
74
+ verbose(:config) do
75
+ spit "#{field.to_s.upcase} -> #{element}"
76
+ end
77
+
78
+ (hash[element] ||= {})[field] = {
79
+ :string => v[:string] || ['%s'] * elements.size * separator,
80
+ :empty => v[:empty] || DEFAULT_EMPTY,
81
+ :elements => elements
82
+ }
83
+ }
84
+
85
+ hash
86
+ end
87
+ }
88
+ end
89
+
90
+ end