athena 0.0.2.56 → 0.0.3.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to athena version 0.0.2
5
+ This documentation refers to athena version 0.0.3
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -17,7 +17,7 @@ TODO: well, the description... ;-)
17
17
 
18
18
  == LICENSE AND COPYRIGHT
19
19
 
20
- Copyright (C) 2007 University of Cologne,
20
+ Copyright (C) 2007-2008 University of Cologne,
21
21
  Albertus-Magnus-Platz, 50932 Cologne, Germany
22
22
 
23
23
  athena is free software: you can redistribute it and/or modify it under the
data/Rakefile CHANGED
@@ -1,21 +1,22 @@
1
+ $:.unshift('lib')
2
+ require 'athena'
3
+
1
4
  begin
2
5
  require 'hen'
6
+
7
+ Hen.lay! {{
8
+ :rubyforge => {
9
+ :package => 'athena'
10
+ },
11
+
12
+ :gem => {
13
+ :version => Athena::VERSION,
14
+ :summary => 'Convert database files to various formats.',
15
+ :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
16
+ :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
17
+ :dependencies => %w[xmlstreamin ruby-nuggets]
18
+ }
19
+ }}
3
20
  rescue LoadError
4
21
  abort "Please install the 'hen' gem first."
5
22
  end
6
-
7
- require 'lib/athena/version'
8
-
9
- Hen.lay! {{
10
- :rubyforge => {
11
- :package => 'athena'
12
- },
13
-
14
- :gem => {
15
- :version => Athena::VERSION,
16
- :summary => 'Convert database files to various formats.',
17
- :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
18
- :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
19
- :dependencies => %w[xmlstreamin ruby-nuggets]
20
- }
21
- }}
data/bin/athena CHANGED
@@ -5,9 +5,9 @@
5
5
  # #
6
6
  # athena -- Convert database files to various formats #
7
7
  # #
8
- # Copyright (C) 2007 University of Cologne, #
9
- # Albertus-Magnus-Platz, #
10
- # 50932 Cologne, Germany #
8
+ # Copyright (C) 2007-2008 University of Cologne, #
9
+ # Albertus-Magnus-Platz, #
10
+ # 50932 Cologne, Germany #
11
11
  # #
12
12
  # Authors: #
13
13
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -65,11 +65,11 @@ OptionParser.new { |opts|
65
65
  opts.on('-i', '--input FILE', "Input file [Default: STDIN]") { |f|
66
66
  abort "Can't find input file: #{f}." unless File.readable?(f)
67
67
 
68
- options[:input] = File.open(f, 'r')
68
+ options[:input] = File.directory?(f) ? Dir.open(f) : File.open(f, 'r')
69
69
 
70
70
  p = File.basename(f).split('.')
71
71
  options[:spec_fallback] = p.last.downcase
72
- options[:target_fallback] = p[0..-2].join('.')
72
+ options[:target_fallback] = p.size > 1 ? p[0..-2].join('.') : p.first
73
73
  }
74
74
 
75
75
  opts.on('-s', '--spec SPEC', "Input format (spec) [Default: file ending of <input-file>]") { |s|
data/lib/athena.rb CHANGED
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # athena -- Convert database files to various formats #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -35,6 +35,9 @@
35
35
  # class method _convert_ supplied. This way, a specific format can even function
36
36
  # as both input and output format.
37
37
 
38
+ module Athena
39
+ end
40
+
38
41
  require 'athena/util'
39
42
  require 'athena/parser'
40
43
  require 'athena/record'
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -26,65 +26,61 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- module Athena
29
+ class Athena::Formats
30
30
 
31
- class Formats
31
+ @formats = { :in => {}, :out => {} }
32
32
 
33
- @formats = { :in => {}, :out => {} }
33
+ class << self
34
34
 
35
- class << self
36
-
37
- def formats
38
- Formats.instance_variable_get(:@formats)
39
- end
40
-
41
- def [](direction, format)
42
- formats[direction][format]
43
- end
44
-
45
- def valid_format?(direction, format)
46
- formats[direction].has_key?(format)
47
- end
35
+ def formats
36
+ Athena::Formats.instance_variable_get(:@formats)
37
+ end
48
38
 
49
- def deferred?
50
- false
51
- end
39
+ def [](direction, format)
40
+ formats[direction][format]
41
+ end
52
42
 
53
- def convert(*args)
54
- raise NotImplementedError, 'must be defined by sub-class'
55
- end
43
+ def valid_format?(direction, format)
44
+ formats[direction].has_key?(format)
45
+ end
56
46
 
57
- private
47
+ def deferred?
48
+ false
49
+ end
58
50
 
59
- def register_format(direction, format)
60
- if existing = formats[direction][format]
61
- raise DuplicateFormatDefinitionError,
62
- "format already defined (#{direction}): #{format} = #{existing}"
63
- end
51
+ def convert(*args)
52
+ raise NotImplementedError, 'must be defined by sub-class'
53
+ end
64
54
 
65
- formats[direction][format] = self
66
- end
55
+ private
67
56
 
68
- def register_formats(direction, *formats)
69
- formats.each { |format|
70
- register_format(direction, format)
71
- }
57
+ def register_format(direction, format)
58
+ if existing = formats[direction][format]
59
+ raise DuplicateFormatDefinitionError,
60
+ "format already defined (#{direction}): #{format} = #{existing}"
72
61
  end
73
62
 
63
+ formats[direction][format] = self
74
64
  end
75
65
 
76
- def parse(*args)
77
- raise NotImplementedError, 'must be defined by sub-class'
66
+ def register_formats(direction, *formats)
67
+ formats.each { |format|
68
+ register_format(direction, format)
69
+ }
78
70
  end
79
71
 
80
- class DuplicateFormatDefinitionError < StandardError
81
- end
72
+ end
82
73
 
83
- class FormatArgumentError < ArgumentError
84
- end
74
+ def parse(*args)
75
+ raise NotImplementedError, 'must be defined by sub-class'
76
+ end
85
77
 
78
+ class DuplicateFormatDefinitionError < StandardError
86
79
  end
87
-
80
+
81
+ class FormatArgumentError < ArgumentError
82
+ end
83
+
88
84
  end
89
85
 
90
86
  Dir[__FILE__.sub(/\.rb$/, '/**/*.rb')].each { |rb|
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -28,41 +28,37 @@
28
28
 
29
29
  require 'iconv'
30
30
 
31
- module Athena
31
+ class Athena::Formats
32
32
 
33
- class Formats
33
+ class DBM < Athena::Formats
34
34
 
35
- class DBM < Athena::Formats
35
+ register_formats :out, 'dbm', 'midos'
36
36
 
37
- register_formats :out, 'dbm', 'midos'
37
+ CRLF = "\015\012"
38
38
 
39
- CRLF = "\015\012"
39
+ ICONV_TO_LATIN1 = Iconv.new('latin1', 'utf-8')
40
40
 
41
- ICONV_TO_LATIN1 = Iconv.new('latin1', 'utf-8')
41
+ VALUE_SEPARATOR = '|'
42
+ RECORD_SEPARATOR = '&&&'
42
43
 
43
- VALUE_SEPARATOR = '|'
44
- RECORD_SEPARATOR = '&&&'
44
+ def self.convert(record)
45
+ dbm = ["ID:#{record.id}"]
45
46
 
46
- def self.convert(record)
47
- dbm = ["ID:#{record.id}"]
47
+ record.struct.each { |field, struct|
48
+ strings = struct[:elements].inject([]) { |array, element|
49
+ values = (struct[:values][element] || []).map { |v|
50
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
51
+ }.reject { |v| v.empty? }
48
52
 
49
- record.struct.each { |field, struct|
50
- strings = struct[:elements].inject([]) { |array, element|
51
- values = (struct[:values][element] || []).map { |v|
52
- (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
53
- }.reject { |v| v.empty? }
54
-
55
- array << (values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR))
56
- }
57
-
58
- dbm << "#{field.to_s.upcase}:#{ICONV_TO_LATIN1.iconv(struct[:string] % strings)}"
53
+ array << (values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR))
59
54
  }
60
55
 
61
- dbm << RECORD_SEPARATOR
56
+ dbm << "#{field.to_s.upcase}:#{ICONV_TO_LATIN1.iconv(struct[:string] % strings)}"
57
+ }
62
58
 
63
- dbm.join(CRLF) << CRLF << CRLF
64
- end
59
+ dbm << RECORD_SEPARATOR
65
60
 
61
+ dbm.join(CRLF) << CRLF << CRLF
66
62
  end
67
63
 
68
64
  end
@@ -0,0 +1,91 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'rubygems'
30
+ require 'ferret'
31
+
32
+ class Athena::Formats
33
+
34
+ class Ferret < Athena::Formats
35
+
36
+ register_format :in, 'ferret'
37
+
38
+ attr_reader :record_element, :config, :parser, :match_all_query
39
+
40
+ def initialize(parser)
41
+ config = parser.config.dup
42
+
43
+ case @record_element = config.delete(:__record_element)
44
+ when String
45
+ # fine!
46
+ when nil
47
+ raise NoRecordElementError, 'no record element specified'
48
+ else
49
+ raise IllegalRecordElementError, "illegal record element #{@record_element}"
50
+ end
51
+
52
+ @config = config
53
+ @parser = parser
54
+
55
+ @match_all_query = ::Ferret::Search::MatchAllQuery.new
56
+ end
57
+
58
+ def parse(source)
59
+ search_all(source) { |doc|
60
+ record = Athena::Record.new(parser.block, doc[record_element])
61
+
62
+ config.each { |element, field_config|
63
+ record.update(element, doc[element], field_config)
64
+ }
65
+
66
+ record.close
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def search_all(source)
73
+ index = ::Ferret::Index::Index.new(
74
+ :path => source.path,
75
+ :create_if_missing => false
76
+ ).searcher
77
+
78
+ index.search_each(match_all_query, :limit => :all) { |doc_id, _|
79
+ yield index[doc_id] if block_given?
80
+ }
81
+ end
82
+
83
+ class NoRecordElementError < StandardError
84
+ end
85
+
86
+ class IllegalRecordElementError < StandardError
87
+ end
88
+
89
+ end
90
+
91
+ end
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -29,113 +29,109 @@
29
29
  require 'iconv'
30
30
  require 'enumerator'
31
31
 
32
- module Athena
32
+ class Athena::Formats
33
33
 
34
- class Formats
34
+ module Lingo
35
35
 
36
- module Lingo
36
+ class Base < Athena::Formats
37
37
 
38
- class Base < Athena::Formats
38
+ class << self
39
39
 
40
- class << self
41
-
42
- def convert(record)
43
- record.struct.inject([]) { |terms, (field, struct)|
44
- terms << struct[:elements].inject([]) { |array, element|
45
- array += (struct[:values][element] || []).map { |v|
46
- (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
47
- }.reject { |v| v.empty? }
48
- }
40
+ def convert(record)
41
+ record.struct.inject([]) { |terms, (field, struct)|
42
+ terms << struct[:elements].inject([]) { |array, element|
43
+ array += (struct[:values][element] || []).map { |v|
44
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
45
+ }.reject { |v| v.empty? }
49
46
  }
50
- end
51
-
52
- def deferred?
53
- true
54
- end
47
+ }
48
+ end
55
49
 
56
- private
50
+ def deferred?
51
+ true
52
+ end
57
53
 
58
- def check_number_of_arguments(expected, actual, blow = false, &block)
59
- return true if block ? block[actual] : expected == actual
54
+ private
60
55
 
61
- msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
56
+ def check_number_of_arguments(expected, actual, blow = false, &block)
57
+ return true if block ? block[actual] : expected == actual
62
58
 
63
- if blow
64
- raise FormatArgumentError, msg
65
- else
66
- warn msg
67
- return false
68
- end
69
- end
59
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
70
60
 
71
- def check_number_of_arguments!(expected, actual, &block)
72
- check_number_of_arguments(expected, actual, true, &block)
61
+ if blow
62
+ raise FormatArgumentError, msg
63
+ else
64
+ warn msg
65
+ return false
73
66
  end
67
+ end
74
68
 
69
+ def check_number_of_arguments!(expected, actual, &block)
70
+ check_number_of_arguments(expected, actual, true, &block)
75
71
  end
76
72
 
77
73
  end
78
74
 
79
- # "Nasenbär\n"
80
- class SingleWord < Athena::Formats::Lingo::Base
75
+ end
81
76
 
82
- register_formats :out, 'lingo/single_word'
77
+ # "Nasenbär\n"
78
+ class SingleWord < Athena::Formats::Lingo::Base
83
79
 
84
- def self.convert(record)
85
- super.flatten
86
- end
80
+ register_formats :out, 'lingo/single_word'
87
81
 
82
+ def self.convert(record)
83
+ super.flatten
88
84
  end
89
85
 
90
- # "John Vorhauer*Vorhauer, John\n"
91
- class KeyValue < Athena::Formats::Lingo::Base
86
+ end
92
87
 
93
- register_formats :out, 'lingo/key_value'
88
+ # "John Vorhauer*Vorhauer, John\n"
89
+ class KeyValue < Athena::Formats::Lingo::Base
94
90
 
95
- def self.convert(record)
96
- super.map { |terms|
97
- next unless check_number_of_arguments(2, terms.size)
91
+ register_formats :out, 'lingo/key_value'
98
92
 
99
- terms.join('*')
100
- }.compact
101
- end
93
+ def self.convert(record)
94
+ super.map { |terms|
95
+ next unless check_number_of_arguments(2, terms.size)
102
96
 
97
+ terms.join('*')
98
+ }.compact
103
99
  end
104
100
 
105
- # "Essen,essen #v Essen #s Esse #s\n"
106
- class WordClass < Athena::Formats::Lingo::Base
101
+ end
107
102
 
108
- register_formats :out, 'lingo/word_class'
103
+ # "Essen,essen #v Essen #s Esse #s\n"
104
+ class WordClass < Athena::Formats::Lingo::Base
109
105
 
110
- def self.convert(record)
111
- super.map { |terms|
112
- next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
113
- actual > 1 && actual % 2 == 1
114
- }
106
+ register_formats :out, 'lingo/word_class'
115
107
 
116
- [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
117
- "#{form} ##{wc}"
118
- }.join(' ')].join(',')
119
- }.compact
120
- end
108
+ def self.convert(record)
109
+ super.map { |terms|
110
+ next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
111
+ actual > 1 && actual % 2 == 1
112
+ }
121
113
 
114
+ [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
115
+ "#{form} ##{wc}"
116
+ }.join(' ')].join(',')
117
+ }.compact
122
118
  end
123
119
 
124
- # "Fax;Faxkopie;Telefax\n"
125
- class MultiValue < Athena::Formats::Lingo::Base
120
+ end
126
121
 
127
- register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
122
+ # "Fax;Faxkopie;Telefax\n"
123
+ class MultiValue < Athena::Formats::Lingo::Base
128
124
 
129
- def self.convert(record)
130
- super.map { |terms|
131
- next unless check_number_of_arguments('> 1', terms.size) { |actual|
132
- actual > 1
133
- }
125
+ register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
134
126
 
135
- terms.join(';')
136
- }.compact
137
- end
127
+ def self.convert(record)
128
+ super.map { |terms|
129
+ next unless check_number_of_arguments('> 1', terms.size) { |actual|
130
+ actual > 1
131
+ }
138
132
 
133
+ terms.join(';')
134
+ }.compact
139
135
  end
140
136
 
141
137
  end