blackwinter-athena 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'iconv'
30
+ require 'enumerator'
31
+
32
+ class Athena::Formats
33
+
34
+ module Lingo
35
+
36
+ class Base < Athena::Formats
37
+
38
+ class << self
39
+
40
+ def convert(record)
41
+ record.struct.inject([]) { |terms, (field, struct)|
42
+ terms << struct[:elements].inject([]) { |array, element|
43
+ array += (struct[:values][element] || []).map { |v|
44
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
45
+ }.reject { |v| v.empty? }
46
+ }
47
+ }
48
+ end
49
+
50
+ def deferred?
51
+ true
52
+ end
53
+
54
+ private
55
+
56
+ def check_number_of_arguments(expected, actual, blow = false, &block)
57
+ return true if block ? block[actual] : expected == actual
58
+
59
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
60
+
61
+ if blow
62
+ raise FormatArgumentError, msg
63
+ else
64
+ warn msg
65
+ return false
66
+ end
67
+ end
68
+
69
+ def check_number_of_arguments!(expected, actual, &block)
70
+ check_number_of_arguments(expected, actual, true, &block)
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+
77
+ # "Nasenbär\n"
78
+ class SingleWord < Athena::Formats::Lingo::Base
79
+
80
+ register_formats :out, 'lingo/single_word'
81
+
82
+ def self.convert(record)
83
+ super.flatten
84
+ end
85
+
86
+ end
87
+
88
+ # "John Vorhauer*Vorhauer, John\n"
89
+ class KeyValue < Athena::Formats::Lingo::Base
90
+
91
+ register_formats :out, 'lingo/key_value'
92
+
93
+ def self.convert(record)
94
+ super.map { |terms|
95
+ next unless check_number_of_arguments(2, terms.size)
96
+
97
+ terms.join('*')
98
+ }.compact
99
+ end
100
+
101
+ end
102
+
103
+ # "Essen,essen #v Essen #s Esse #s\n"
104
+ class WordClass < Athena::Formats::Lingo::Base
105
+
106
+ register_formats :out, 'lingo/word_class'
107
+
108
+ def self.convert(record)
109
+ super.map { |terms|
110
+ next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
111
+ actual > 1 && actual % 2 == 1
112
+ }
113
+
114
+ [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
115
+ "#{form} ##{wc}"
116
+ }.join(' ')].join(',')
117
+ }.compact
118
+ end
119
+
120
+ end
121
+
122
+ # "Fax;Faxkopie;Telefax\n"
123
+ class MultiValue < Athena::Formats::Lingo::Base
124
+
125
+ register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
126
+
127
+ def self.convert(record)
128
+ super.map { |terms|
129
+ next unless check_number_of_arguments('> 1', terms.size) { |actual|
130
+ actual > 1
131
+ }
132
+
133
+ terms.join(';')
134
+ }.compact
135
+ end
136
+
137
+ end
138
+
139
+ end
140
+
141
+ end
@@ -0,0 +1,79 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ class Athena::Formats
30
+
31
+ class Sisis < Athena::Formats
32
+
33
+ register_format :in, 'sisis'
34
+
35
+ attr_reader :record_element, :config, :parser
36
+
37
+ def initialize(parser)
38
+ config = parser.config.dup
39
+
40
+ case @record_element = config.delete(:__record_element)
41
+ when String
42
+ # fine!
43
+ when nil
44
+ raise NoRecordElementError, 'no record element specified'
45
+ else
46
+ raise IllegalRecordElementError, "illegal record element #{@record_element}"
47
+ end
48
+
49
+ @config = config
50
+ @parser = parser
51
+ end
52
+
53
+ def parse(source)
54
+ record = nil
55
+
56
+ source.each { |line|
57
+ element, value = line.match(/(\d+).*?:\s*(.*)/)[1, 2]
58
+
59
+ case element
60
+ when record_element
61
+ record.close if record
62
+ record = Athena::Record.new(parser.block, value)
63
+ else
64
+ record.update(element, value, config[element])
65
+ end
66
+ }
67
+
68
+ record.close if record
69
+ end
70
+
71
+ class NoRecordElementError < StandardError
72
+ end
73
+
74
+ class IllegalRecordElementError < StandardError
75
+ end
76
+
77
+ end
78
+
79
+ end
@@ -0,0 +1,274 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'forwardable'
30
+
31
+ require 'rubygems'
32
+
33
+ require 'xmlstreamin'
34
+ require 'nuggets/hash/insert'
35
+
36
+ class Athena::Formats
37
+
38
+ class XML < Athena::Formats
39
+
40
+ include Athena::Util
41
+
42
+ register_format :in, 'xml'
43
+
44
+ attr_reader :spec, :listener
45
+
46
+ def initialize(parser)
47
+ @spec = build_spec(parser)
48
+ @listener = XMLStreamin::XMLStreamListener.new(@spec)
49
+ end
50
+
51
+ def parse(source)
52
+ REXML::Document.parse_stream(source, listener)
53
+ end
54
+
55
+ private
56
+
57
+ def build_spec(parser)
58
+ config = parser.config.dup
59
+
60
+ case record_element = config.delete(:__record_element)
61
+ when String
62
+ # fine!
63
+ when nil
64
+ raise NoRecordElementError, 'no record element specified'
65
+ else
66
+ raise IllegalRecordElementError, "illegal record element #{record_element}"
67
+ end
68
+
69
+ element_specs = config.inject({}) { |specs, (element, element_spec)|
70
+ element_spec.each { |field, c|
71
+ element.split('/').reverse.inject({}) { |hash, part|
72
+ s = define_spec(element, field, c, hash.empty? ? :default : hash)
73
+ merge_specs(hash, part, s)
74
+ }.each { |key, s|
75
+ merge_specs(specs, key, s)
76
+ }
77
+ }
78
+
79
+ specs
80
+ }
81
+
82
+ record_spec = RecordSpec.new(parser)
83
+ record_spec.specs!(element_specs)
84
+
85
+ root_spec = BaseSpec.new
86
+ root_spec.specs!(record_element => record_spec)
87
+
88
+ spec = BaseSpec.new
89
+ spec.default!(root_spec)
90
+
91
+ verbose(:spec, BaseSpec) do
92
+ spec.inspect_spec
93
+ end
94
+
95
+ spec
96
+ end
97
+
98
+ def define_spec(element, field, config, arg)
99
+ spec = ElementSpec.new(element, field, config)
100
+
101
+ case arg
102
+ when Hash
103
+ spec.specs!(arg)
104
+ else
105
+ spec.default!(SubElementSpec.new(spec))
106
+ end
107
+
108
+ spec
109
+ end
110
+
111
+ def merge_specs(container, key, spec)
112
+ container.insert!(key, spec) { |s1, s2|
113
+ if s1.respond_to?(:specs!)
114
+ s1.specs!(s2.respond_to?(:specs) ? s2.specs : s2)
115
+ s1
116
+ else
117
+ s1.merge(s2)
118
+ end
119
+ }
120
+ end
121
+
122
+ class BaseSpec < XMLStreamin::XMLSpec
123
+
124
+ include Athena::Util
125
+
126
+ @level = 0
127
+
128
+ def start(context, name, attrs)
129
+ verbose(:xml) do
130
+ spit "#{indent(level)}<#{name}>"
131
+ step :down
132
+
133
+ attrs.each { |attr|
134
+ spit "#{indent(level + 1)}[#{attr[0]} = #{attr[1]}]"
135
+ }
136
+ end
137
+
138
+ return context
139
+ end
140
+
141
+ def text(context, data)
142
+ verbose(:xml) do
143
+ content = data.strip
144
+ spit "#{indent(level)}#{content}" unless content.empty?
145
+ end
146
+
147
+ return context
148
+ end
149
+
150
+ def done(context, name)
151
+ verbose(:xml) do
152
+ step :up
153
+ spit "#{indent(level)}</#{name}>"
154
+ end
155
+
156
+ return context
157
+ end
158
+
159
+ def empty(context)
160
+ verbose(:xml) do
161
+ step :up
162
+ end
163
+
164
+ return context
165
+ end
166
+
167
+ def inspect_spec(element = nil, level = 0)
168
+ if respond_to?(:field)
169
+ msg = "#{indent(level)}[#{element}] #{field.to_s.upcase} -> #{name}"
170
+ respond_to?(:spit) ? spit(msg) : warn(msg)
171
+ specs.each { |e, s|
172
+ s.inspect_spec(e, level + 1)
173
+ }
174
+ else
175
+ if specs.empty?
176
+ specs.default.inspect_spec('?', level)
177
+ else
178
+ specs.each { |e, s|
179
+ s.inspect_spec(e, level)
180
+ }
181
+ end
182
+ end
183
+ end
184
+
185
+ private
186
+
187
+ def level
188
+ BaseSpec.instance_variable_get(:@level)
189
+ end
190
+
191
+ def step(direction)
192
+ steps = { :down => 1, :up => -1 }
193
+ BaseSpec.instance_variable_set(:@level, level + steps[direction])
194
+ end
195
+
196
+ end
197
+
198
+ class RecordSpec < BaseSpec
199
+
200
+ attr_reader :parser
201
+ attr_accessor :record
202
+
203
+ def initialize(parser)
204
+ super()
205
+
206
+ @parser = parser
207
+ end
208
+
209
+ def start(context, name, attrs)
210
+ super
211
+
212
+ self.record = Athena::Record.new(parser.block)
213
+ end
214
+
215
+ def done(context, name)
216
+ super
217
+
218
+ record.close
219
+ end
220
+
221
+ end
222
+
223
+ class ElementSpec < BaseSpec
224
+
225
+ attr_reader :name, :field, :config
226
+ attr_accessor :record
227
+
228
+ def initialize(name, field, config)
229
+ super()
230
+
231
+ @name = name
232
+ @field = field
233
+ @config = config
234
+ end
235
+
236
+ def start(context, name, attrs)
237
+ super
238
+
239
+ self.record = Athena::Record[field, config]
240
+ end
241
+
242
+ def text(context, data)
243
+ super
244
+
245
+ record.update(name, data)
246
+ end
247
+
248
+ end
249
+
250
+ class SubElementSpec < BaseSpec
251
+
252
+ extend Forwardable
253
+
254
+ # Forward to parent element; need to specify *all* its attributes and methods
255
+ def_delegators :@parent, :name, :field, :config, :record, :start, :text
256
+
257
+ def initialize(parent)
258
+ super()
259
+
260
+ @parent = parent
261
+ default!(self)
262
+ end
263
+
264
+ end
265
+
266
+ class NoRecordElementError < StandardError
267
+ end
268
+
269
+ class IllegalRecordElementError < StandardError
270
+ end
271
+
272
+ end
273
+
274
+ end
@@ -0,0 +1,88 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ class Athena::Formats
30
+
31
+ @formats = { :in => {}, :out => {} }
32
+
33
+ class << self
34
+
35
+ def formats
36
+ Athena::Formats.instance_variable_get(:@formats)
37
+ end
38
+
39
+ def [](direction, format)
40
+ formats[direction][format]
41
+ end
42
+
43
+ def valid_format?(direction, format)
44
+ formats[direction].has_key?(format)
45
+ end
46
+
47
+ def deferred?
48
+ false
49
+ end
50
+
51
+ def convert(*args)
52
+ raise NotImplementedError, 'must be defined by sub-class'
53
+ end
54
+
55
+ private
56
+
57
+ def register_format(direction, format)
58
+ if existing = formats[direction][format]
59
+ raise DuplicateFormatDefinitionError,
60
+ "format already defined (#{direction}): #{format} = #{existing}"
61
+ end
62
+
63
+ formats[direction][format] = self
64
+ end
65
+
66
+ def register_formats(direction, *formats)
67
+ formats.each { |format|
68
+ register_format(direction, format)
69
+ }
70
+ end
71
+
72
+ end
73
+
74
+ def parse(*args)
75
+ raise NotImplementedError, 'must be defined by sub-class'
76
+ end
77
+
78
+ class DuplicateFormatDefinitionError < StandardError
79
+ end
80
+
81
+ class FormatArgumentError < ArgumentError
82
+ end
83
+
84
+ end
85
+
86
+ Dir[__FILE__.sub(/\.rb$/, '/**/*.rb')].each { |rb|
87
+ require rb
88
+ }
@@ -0,0 +1,90 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ class Athena::Parser
30
+
31
+ include Athena::Util
32
+
33
+ DEFAULT_SEPARATOR = ', '
34
+ DEFAULT_EMPTY = '<<EMPTY>>'
35
+
36
+ attr_reader :config, :spec
37
+ attr_accessor :block
38
+
39
+ def initialize(config, spec)
40
+ @config = build_config(config)
41
+ @spec = Athena::Formats[:in, spec].new(self)
42
+ end
43
+
44
+ def parse(source, &block)
45
+ self.block = block
46
+
47
+ spec.parse(source)
48
+ Athena::Record.records
49
+ end
50
+
51
+ private
52
+
53
+ def build_config(config)
54
+ config.inject({}) { |hash, (field, v)|
55
+ if field.to_s =~ /^__/
56
+ hash.merge(field => v)
57
+ else
58
+ case v
59
+ when String, Array
60
+ elements = [*v]
61
+ v = {}
62
+ when Hash
63
+ elements = v[:elements] || v[:element].to_a
64
+
65
+ raise ArgumentError, "no elements specified for field #{field}" \
66
+ unless elements.is_a?(Array)
67
+ else
68
+ raise ArgumentError, "illegal value for field #{field}"
69
+ end
70
+
71
+ separator = v[:separator] || DEFAULT_SEPARATOR
72
+
73
+ elements.each { |element|
74
+ verbose(:config) do
75
+ spit "#{field.to_s.upcase} -> #{element}"
76
+ end
77
+
78
+ (hash[element] ||= {})[field] = {
79
+ :string => v[:string] || ['%s'] * elements.size * separator,
80
+ :empty => v[:empty] || DEFAULT_EMPTY,
81
+ :elements => elements
82
+ }
83
+ }
84
+
85
+ hash
86
+ end
87
+ }
88
+ end
89
+
90
+ end