athena 0.0.2.56 → 0.0.3.58

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to athena version 0.0.2
5
+ This documentation refers to athena version 0.0.3
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -17,7 +17,7 @@ TODO: well, the description... ;-)
17
17
 
18
18
  == LICENSE AND COPYRIGHT
19
19
 
20
- Copyright (C) 2007 University of Cologne,
20
+ Copyright (C) 2007-2008 University of Cologne,
21
21
  Albertus-Magnus-Platz, 50932 Cologne, Germany
22
22
 
23
23
  athena is free software: you can redistribute it and/or modify it under the
data/Rakefile CHANGED
@@ -1,21 +1,22 @@
1
+ $:.unshift('lib')
2
+ require 'athena'
3
+
1
4
  begin
2
5
  require 'hen'
6
+
7
+ Hen.lay! {{
8
+ :rubyforge => {
9
+ :package => 'athena'
10
+ },
11
+
12
+ :gem => {
13
+ :version => Athena::VERSION,
14
+ :summary => 'Convert database files to various formats.',
15
+ :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
16
+ :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
17
+ :dependencies => %w[xmlstreamin ruby-nuggets]
18
+ }
19
+ }}
3
20
  rescue LoadError
4
21
  abort "Please install the 'hen' gem first."
5
22
  end
6
-
7
- require 'lib/athena/version'
8
-
9
- Hen.lay! {{
10
- :rubyforge => {
11
- :package => 'athena'
12
- },
13
-
14
- :gem => {
15
- :version => Athena::VERSION,
16
- :summary => 'Convert database files to various formats.',
17
- :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
18
- :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
19
- :dependencies => %w[xmlstreamin ruby-nuggets]
20
- }
21
- }}
data/bin/athena CHANGED
@@ -5,9 +5,9 @@
5
5
  # #
6
6
  # athena -- Convert database files to various formats #
7
7
  # #
8
- # Copyright (C) 2007 University of Cologne, #
9
- # Albertus-Magnus-Platz, #
10
- # 50932 Cologne, Germany #
8
+ # Copyright (C) 2007-2008 University of Cologne, #
9
+ # Albertus-Magnus-Platz, #
10
+ # 50932 Cologne, Germany #
11
11
  # #
12
12
  # Authors: #
13
13
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -65,11 +65,11 @@ OptionParser.new { |opts|
65
65
  opts.on('-i', '--input FILE', "Input file [Default: STDIN]") { |f|
66
66
  abort "Can't find input file: #{f}." unless File.readable?(f)
67
67
 
68
- options[:input] = File.open(f, 'r')
68
+ options[:input] = File.directory?(f) ? Dir.open(f) : File.open(f, 'r')
69
69
 
70
70
  p = File.basename(f).split('.')
71
71
  options[:spec_fallback] = p.last.downcase
72
- options[:target_fallback] = p[0..-2].join('.')
72
+ options[:target_fallback] = p.size > 1 ? p[0..-2].join('.') : p.first
73
73
  }
74
74
 
75
75
  opts.on('-s', '--spec SPEC', "Input format (spec) [Default: file ending of <input-file>]") { |s|
data/lib/athena.rb CHANGED
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # athena -- Convert database files to various formats #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -35,6 +35,9 @@
35
35
  # class method _convert_ supplied. This way, a specific format can even function
36
36
  # as both input and output format.
37
37
 
38
+ module Athena
39
+ end
40
+
38
41
  require 'athena/util'
39
42
  require 'athena/parser'
40
43
  require 'athena/record'
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -26,65 +26,61 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- module Athena
29
+ class Athena::Formats
30
30
 
31
- class Formats
31
+ @formats = { :in => {}, :out => {} }
32
32
 
33
- @formats = { :in => {}, :out => {} }
33
+ class << self
34
34
 
35
- class << self
36
-
37
- def formats
38
- Formats.instance_variable_get(:@formats)
39
- end
40
-
41
- def [](direction, format)
42
- formats[direction][format]
43
- end
44
-
45
- def valid_format?(direction, format)
46
- formats[direction].has_key?(format)
47
- end
35
+ def formats
36
+ Athena::Formats.instance_variable_get(:@formats)
37
+ end
48
38
 
49
- def deferred?
50
- false
51
- end
39
+ def [](direction, format)
40
+ formats[direction][format]
41
+ end
52
42
 
53
- def convert(*args)
54
- raise NotImplementedError, 'must be defined by sub-class'
55
- end
43
+ def valid_format?(direction, format)
44
+ formats[direction].has_key?(format)
45
+ end
56
46
 
57
- private
47
+ def deferred?
48
+ false
49
+ end
58
50
 
59
- def register_format(direction, format)
60
- if existing = formats[direction][format]
61
- raise DuplicateFormatDefinitionError,
62
- "format already defined (#{direction}): #{format} = #{existing}"
63
- end
51
+ def convert(*args)
52
+ raise NotImplementedError, 'must be defined by sub-class'
53
+ end
64
54
 
65
- formats[direction][format] = self
66
- end
55
+ private
67
56
 
68
- def register_formats(direction, *formats)
69
- formats.each { |format|
70
- register_format(direction, format)
71
- }
57
+ def register_format(direction, format)
58
+ if existing = formats[direction][format]
59
+ raise DuplicateFormatDefinitionError,
60
+ "format already defined (#{direction}): #{format} = #{existing}"
72
61
  end
73
62
 
63
+ formats[direction][format] = self
74
64
  end
75
65
 
76
- def parse(*args)
77
- raise NotImplementedError, 'must be defined by sub-class'
66
+ def register_formats(direction, *formats)
67
+ formats.each { |format|
68
+ register_format(direction, format)
69
+ }
78
70
  end
79
71
 
80
- class DuplicateFormatDefinitionError < StandardError
81
- end
72
+ end
82
73
 
83
- class FormatArgumentError < ArgumentError
84
- end
74
+ def parse(*args)
75
+ raise NotImplementedError, 'must be defined by sub-class'
76
+ end
85
77
 
78
+ class DuplicateFormatDefinitionError < StandardError
86
79
  end
87
-
80
+
81
+ class FormatArgumentError < ArgumentError
82
+ end
83
+
88
84
  end
89
85
 
90
86
  Dir[__FILE__.sub(/\.rb$/, '/**/*.rb')].each { |rb|
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -28,41 +28,37 @@
28
28
 
29
29
  require 'iconv'
30
30
 
31
- module Athena
31
+ class Athena::Formats
32
32
 
33
- class Formats
33
+ class DBM < Athena::Formats
34
34
 
35
- class DBM < Athena::Formats
35
+ register_formats :out, 'dbm', 'midos'
36
36
 
37
- register_formats :out, 'dbm', 'midos'
37
+ CRLF = "\015\012"
38
38
 
39
- CRLF = "\015\012"
39
+ ICONV_TO_LATIN1 = Iconv.new('latin1', 'utf-8')
40
40
 
41
- ICONV_TO_LATIN1 = Iconv.new('latin1', 'utf-8')
41
+ VALUE_SEPARATOR = '|'
42
+ RECORD_SEPARATOR = '&&&'
42
43
 
43
- VALUE_SEPARATOR = '|'
44
- RECORD_SEPARATOR = '&&&'
44
+ def self.convert(record)
45
+ dbm = ["ID:#{record.id}"]
45
46
 
46
- def self.convert(record)
47
- dbm = ["ID:#{record.id}"]
47
+ record.struct.each { |field, struct|
48
+ strings = struct[:elements].inject([]) { |array, element|
49
+ values = (struct[:values][element] || []).map { |v|
50
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
51
+ }.reject { |v| v.empty? }
48
52
 
49
- record.struct.each { |field, struct|
50
- strings = struct[:elements].inject([]) { |array, element|
51
- values = (struct[:values][element] || []).map { |v|
52
- (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
53
- }.reject { |v| v.empty? }
54
-
55
- array << (values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR))
56
- }
57
-
58
- dbm << "#{field.to_s.upcase}:#{ICONV_TO_LATIN1.iconv(struct[:string] % strings)}"
53
+ array << (values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR))
59
54
  }
60
55
 
61
- dbm << RECORD_SEPARATOR
56
+ dbm << "#{field.to_s.upcase}:#{ICONV_TO_LATIN1.iconv(struct[:string] % strings)}"
57
+ }
62
58
 
63
- dbm.join(CRLF) << CRLF << CRLF
64
- end
59
+ dbm << RECORD_SEPARATOR
65
60
 
61
+ dbm.join(CRLF) << CRLF << CRLF
66
62
  end
67
63
 
68
64
  end
@@ -0,0 +1,91 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'rubygems'
30
+ require 'ferret'
31
+
32
+ class Athena::Formats
33
+
34
+ class Ferret < Athena::Formats
35
+
36
+ register_format :in, 'ferret'
37
+
38
+ attr_reader :record_element, :config, :parser, :match_all_query
39
+
40
+ def initialize(parser)
41
+ config = parser.config.dup
42
+
43
+ case @record_element = config.delete(:__record_element)
44
+ when String
45
+ # fine!
46
+ when nil
47
+ raise NoRecordElementError, 'no record element specified'
48
+ else
49
+ raise IllegalRecordElementError, "illegal record element #{@record_element}"
50
+ end
51
+
52
+ @config = config
53
+ @parser = parser
54
+
55
+ @match_all_query = ::Ferret::Search::MatchAllQuery.new
56
+ end
57
+
58
+ def parse(source)
59
+ search_all(source) { |doc|
60
+ record = Athena::Record.new(parser.block, doc[record_element])
61
+
62
+ config.each { |element, field_config|
63
+ record.update(element, doc[element], field_config)
64
+ }
65
+
66
+ record.close
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def search_all(source)
73
+ index = ::Ferret::Index::Index.new(
74
+ :path => source.path,
75
+ :create_if_missing => false
76
+ ).searcher
77
+
78
+ index.search_each(match_all_query, :limit => :all) { |doc_id, _|
79
+ yield index[doc_id] if block_given?
80
+ }
81
+ end
82
+
83
+ class NoRecordElementError < StandardError
84
+ end
85
+
86
+ class IllegalRecordElementError < StandardError
87
+ end
88
+
89
+ end
90
+
91
+ end
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -29,113 +29,109 @@
29
29
  require 'iconv'
30
30
  require 'enumerator'
31
31
 
32
- module Athena
32
+ class Athena::Formats
33
33
 
34
- class Formats
34
+ module Lingo
35
35
 
36
- module Lingo
36
+ class Base < Athena::Formats
37
37
 
38
- class Base < Athena::Formats
38
+ class << self
39
39
 
40
- class << self
41
-
42
- def convert(record)
43
- record.struct.inject([]) { |terms, (field, struct)|
44
- terms << struct[:elements].inject([]) { |array, element|
45
- array += (struct[:values][element] || []).map { |v|
46
- (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
47
- }.reject { |v| v.empty? }
48
- }
40
+ def convert(record)
41
+ record.struct.inject([]) { |terms, (field, struct)|
42
+ terms << struct[:elements].inject([]) { |array, element|
43
+ array += (struct[:values][element] || []).map { |v|
44
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
45
+ }.reject { |v| v.empty? }
49
46
  }
50
- end
51
-
52
- def deferred?
53
- true
54
- end
47
+ }
48
+ end
55
49
 
56
- private
50
+ def deferred?
51
+ true
52
+ end
57
53
 
58
- def check_number_of_arguments(expected, actual, blow = false, &block)
59
- return true if block ? block[actual] : expected == actual
54
+ private
60
55
 
61
- msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
56
+ def check_number_of_arguments(expected, actual, blow = false, &block)
57
+ return true if block ? block[actual] : expected == actual
62
58
 
63
- if blow
64
- raise FormatArgumentError, msg
65
- else
66
- warn msg
67
- return false
68
- end
69
- end
59
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
70
60
 
71
- def check_number_of_arguments!(expected, actual, &block)
72
- check_number_of_arguments(expected, actual, true, &block)
61
+ if blow
62
+ raise FormatArgumentError, msg
63
+ else
64
+ warn msg
65
+ return false
73
66
  end
67
+ end
74
68
 
69
+ def check_number_of_arguments!(expected, actual, &block)
70
+ check_number_of_arguments(expected, actual, true, &block)
75
71
  end
76
72
 
77
73
  end
78
74
 
79
- # "Nasenbär\n"
80
- class SingleWord < Athena::Formats::Lingo::Base
75
+ end
81
76
 
82
- register_formats :out, 'lingo/single_word'
77
+ # "Nasenbär\n"
78
+ class SingleWord < Athena::Formats::Lingo::Base
83
79
 
84
- def self.convert(record)
85
- super.flatten
86
- end
80
+ register_formats :out, 'lingo/single_word'
87
81
 
82
+ def self.convert(record)
83
+ super.flatten
88
84
  end
89
85
 
90
- # "John Vorhauer*Vorhauer, John\n"
91
- class KeyValue < Athena::Formats::Lingo::Base
86
+ end
92
87
 
93
- register_formats :out, 'lingo/key_value'
88
+ # "John Vorhauer*Vorhauer, John\n"
89
+ class KeyValue < Athena::Formats::Lingo::Base
94
90
 
95
- def self.convert(record)
96
- super.map { |terms|
97
- next unless check_number_of_arguments(2, terms.size)
91
+ register_formats :out, 'lingo/key_value'
98
92
 
99
- terms.join('*')
100
- }.compact
101
- end
93
+ def self.convert(record)
94
+ super.map { |terms|
95
+ next unless check_number_of_arguments(2, terms.size)
102
96
 
97
+ terms.join('*')
98
+ }.compact
103
99
  end
104
100
 
105
- # "Essen,essen #v Essen #s Esse #s\n"
106
- class WordClass < Athena::Formats::Lingo::Base
101
+ end
107
102
 
108
- register_formats :out, 'lingo/word_class'
103
+ # "Essen,essen #v Essen #s Esse #s\n"
104
+ class WordClass < Athena::Formats::Lingo::Base
109
105
 
110
- def self.convert(record)
111
- super.map { |terms|
112
- next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
113
- actual > 1 && actual % 2 == 1
114
- }
106
+ register_formats :out, 'lingo/word_class'
115
107
 
116
- [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
117
- "#{form} ##{wc}"
118
- }.join(' ')].join(',')
119
- }.compact
120
- end
108
+ def self.convert(record)
109
+ super.map { |terms|
110
+ next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
111
+ actual > 1 && actual % 2 == 1
112
+ }
121
113
 
114
+ [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
115
+ "#{form} ##{wc}"
116
+ }.join(' ')].join(',')
117
+ }.compact
122
118
  end
123
119
 
124
- # "Fax;Faxkopie;Telefax\n"
125
- class MultiValue < Athena::Formats::Lingo::Base
120
+ end
126
121
 
127
- register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
122
+ # "Fax;Faxkopie;Telefax\n"
123
+ class MultiValue < Athena::Formats::Lingo::Base
128
124
 
129
- def self.convert(record)
130
- super.map { |terms|
131
- next unless check_number_of_arguments('> 1', terms.size) { |actual|
132
- actual > 1
133
- }
125
+ register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
134
126
 
135
- terms.join(';')
136
- }.compact
137
- end
127
+ def self.convert(record)
128
+ super.map { |terms|
129
+ next unless check_number_of_arguments('> 1', terms.size) { |actual|
130
+ actual > 1
131
+ }
138
132
 
133
+ terms.join(';')
134
+ }.compact
139
135
  end
140
136
 
141
137
  end