athena 0.0.1.53 → 0.0.2.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/COPYING CHANGED
@@ -1,4 +1,4 @@
1
- = License for ruby-nuggets
1
+ = License for athena
2
2
 
3
3
  GNU GENERAL PUBLIC LICENSE
4
4
  Version 3, 29 June 2007
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to athena version 0.0.1
5
+ This documentation refers to athena version 0.0.2
6
6
 
7
7
 
8
8
  == DESCRIPTION
data/Rakefile CHANGED
@@ -1,23 +1,21 @@
1
- require 'lib/athena/version'
1
+ begin
2
+ require 'hen'
3
+ rescue LoadError
4
+ abort "Please install the 'hen' gem first."
5
+ end
2
6
 
3
- FILES = FileList['lib/**/*.rb'].to_a
4
- EXECS = FileList['bin/*'].to_a
5
- RDOCS = %w[README COPYING ChangeLog]
6
- OTHER = FileList['[A-Z]*', 'example/*'].to_a
7
+ require 'lib/athena/version'
7
8
 
8
- task(:doc_spec) {{
9
- :title => 'athena Application documentation',
10
- :rdoc_files => RDOCS + FILES
11
- }}
9
+ Hen.lay! {{
10
+ :rubyforge => {
11
+ :package => 'athena'
12
+ },
12
13
 
13
- task(:gem_spec) {{
14
- :name => 'athena',
15
- :version => Athena::VERSION,
16
- :summary => 'Convert database files to various formats',
17
- :files => FILES + EXECS + OTHER,
18
- :require_path => 'lib',
19
- :bindir => 'bin',
20
- :executables => EXECS,
21
- :extra_rdoc_files => RDOCS,
22
- :dependencies => %w[xmlstreamin ruby-nuggets]
14
+ :gem => {
15
+ :version => Athena::VERSION,
16
+ :summary => 'Convert database files to various formats.',
17
+ :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
18
+ :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
19
+ :dependencies => %w[xmlstreamin ruby-nuggets]
20
+ }
23
21
  }}
data/bin/athena CHANGED
@@ -39,7 +39,7 @@ USAGE = "Usage: #{$0} [-h|--help] [options]"
39
39
  abort USAGE if ARGV.empty?
40
40
 
41
41
  # Global variable to handle verbosity
42
- $_VERBOSE = {}
42
+ $Verbose = {}
43
43
 
44
44
  options = {
45
45
  :config => 'config.yaml',
@@ -54,16 +54,16 @@ OptionParser.new { |opts|
54
54
  opts.separator ''
55
55
  opts.separator 'Options:'
56
56
 
57
- opts.on('-c', '--config YAML', "Config file [Default: #{options[:config]}#{' (currently not present)' unless File.readable? options[:config]}]") { |f|
58
- abort "Can't find config file: #{f}." unless File.readable? f
57
+ opts.on('-c', '--config YAML', "Config file [Default: #{options[:config]}#{' (currently not present)' unless File.readable?(options[:config])}]") { |f|
58
+ abort "Can't find config file: #{f}." unless File.readable?(f)
59
59
 
60
60
  options[:config] = f
61
61
  }
62
62
 
63
63
  opts.separator ''
64
64
 
65
- opts.on('-i', '--input FILE', "Input file [Default: #{options[:input] == STDIN ? 'STDIN' : options[:input]}]") { |f|
66
- abort "Can't find input file: #{f}." unless File.readable? f
65
+ opts.on('-i', '--input FILE', "Input file [Default: STDIN]") { |f|
66
+ abort "Can't find input file: #{f}." unless File.readable?(f)
67
67
 
68
68
  options[:input] = File.open(f, 'r')
69
69
 
@@ -78,8 +78,11 @@ OptionParser.new { |opts|
78
78
 
79
79
  opts.on('-L', '--list-specs', "List available input formats (specs) and exit") {
80
80
  puts "Available input formats (specs):"
81
- Athena.input_formats.each { |s, k|
82
- puts " - #{s} = #{k}"
81
+
82
+ formats = Athena.input_formats
83
+ max = formats.map { |a, _| a.length }.max
84
+ formats.each { |f, k|
85
+ puts " - %-#{max}s = %s" % [f, k]
83
86
  }
84
87
 
85
88
  exit 0
@@ -87,7 +90,7 @@ OptionParser.new { |opts|
87
90
 
88
91
  opts.separator ''
89
92
 
90
- opts.on('-o', '--output FILE', "Output file [Default: #{options[:output] == STDOUT ? 'STDOUT' : options[:output]}]") { |f|
93
+ opts.on('-o', '--output FILE', "Output file [Default: STDOUT]") { |f|
91
94
  options[:output] = File.open(f, 'w')
92
95
 
93
96
  options[:format_fallback] = f.split('.').last.downcase
@@ -99,8 +102,11 @@ OptionParser.new { |opts|
99
102
 
100
103
  opts.on('-l', '--list-formats', "List available output formats and exit") {
101
104
  puts "Available output formats:"
102
- Athena.output_formats.each { |f, k|
103
- puts " - #{f} = #{k}"
105
+
106
+ formats = Athena.output_formats
107
+ max = formats.map { |a, _| a.length }.max
108
+ formats.each { |f, k|
109
+ puts " - %-#{max}s = %s" % [f, k]
104
110
  }
105
111
 
106
112
  exit 0
@@ -117,10 +123,10 @@ OptionParser.new { |opts|
117
123
 
118
124
  opts.on('-v', '--verbose [WHAT]', "Be verbose about what's being done. Optional argument is a comma-separated", "list of what should be output, or 'all' [Default: 'all']") { |what|
119
125
  if what.nil? || what == 'all'
120
- $_VERBOSE.default = true
126
+ $Verbose.default = true
121
127
  else
122
128
  what.split(',').each { |w|
123
- $_VERBOSE[w.to_sym] = true
129
+ $Verbose[w.to_sym] = true
124
130
  }
125
131
  end
126
132
  }
@@ -136,11 +142,11 @@ OptionParser.new { |opts|
136
142
 
137
143
  spec = options[:spec] || options[:spec_fallback]
138
144
  abort "No input format (spec) specified and none could be inferred." unless spec
139
- abort "Invalid input format (spec): #{spec}. Use '-L' to get a list of available specs." unless Athena.valid_input_format? spec
145
+ abort "Invalid input format (spec): #{spec}. Use '-L' to get a list of available specs." unless Athena.valid_input_format?(spec)
140
146
 
141
147
  format = options[:format] || options[:format_fallback]
142
148
  abort "No output format specified and none could be inferred." unless format
143
- abort "Invalid output format: #{format}. Use '-l' to get a list of available formats." unless Athena.valid_output_format? format
149
+ abort "Invalid output format: #{format}. Use '-l' to get a list of available formats." unless Athena.valid_output_format?(format)
144
150
 
145
151
  yaml = YAML.load_file(options[:config])
146
152
  if t = options[:target]
@@ -156,9 +162,21 @@ else
156
162
  end
157
163
  abort "Config not found for target: #{target}." unless config
158
164
 
159
- records = Athena.parser(config, spec).parse(options[:input]) { |record|
160
- options[:output].puts record.to(format)
161
- }
165
+ parser = Athena.parser(config, spec)
166
+
167
+ if Athena.deferred_output?(format)
168
+ records = parser.parse(options[:input])
169
+
170
+ records.map { |record|
171
+ record.to(format)
172
+ }.flatten.sort.uniq.each { |line|
173
+ options[:output].puts line
174
+ }
175
+ else
176
+ records = parser.parse(options[:input]) { |record|
177
+ options[:output].puts record.to(format)
178
+ }
179
+ end
162
180
 
163
181
  Athena::Util.verbose(:count) do
164
182
  spit records.size
@@ -65,4 +65,8 @@ module Athena
65
65
  Formats.valid_format?(:out, format)
66
66
  end
67
67
 
68
+ def deferred_output?(format)
69
+ Formats[:out, format].deferred?
70
+ end
71
+
68
72
  end
@@ -35,7 +35,7 @@ module Athena
35
35
  class << self
36
36
 
37
37
  def formats
38
- Formats.instance_variable_get :@formats
38
+ Formats.instance_variable_get(:@formats)
39
39
  end
40
40
 
41
41
  def [](direction, format)
@@ -43,7 +43,11 @@ module Athena
43
43
  end
44
44
 
45
45
  def valid_format?(direction, format)
46
- formats[direction].has_key? format
46
+ formats[direction].has_key?(format)
47
+ end
48
+
49
+ def deferred?
50
+ false
47
51
  end
48
52
 
49
53
  def convert(*args)
@@ -53,6 +57,11 @@ module Athena
53
57
  private
54
58
 
55
59
  def register_format(direction, format)
60
+ if existing = formats[direction][format]
61
+ raise DuplicateFormatDefinitionError,
62
+ "format already defined (#{direction}): #{format} = #{existing}"
63
+ end
64
+
56
65
  formats[direction][format] = self
57
66
  end
58
67
 
@@ -68,6 +77,12 @@ module Athena
68
77
  raise NotImplementedError, 'must be defined by sub-class'
69
78
  end
70
79
 
80
+ class DuplicateFormatDefinitionError < StandardError
81
+ end
82
+
83
+ class FormatArgumentError < ArgumentError
84
+ end
85
+
71
86
  end
72
87
 
73
88
  end
@@ -40,20 +40,25 @@ module Athena
40
40
 
41
41
  ICONV_TO_LATIN1 = Iconv.new('latin1', 'utf-8')
42
42
 
43
+ VALUE_SEPARATOR = '|'
44
+ RECORD_SEPARATOR = '&&&'
45
+
43
46
  def self.convert(record)
44
47
  dbm = ["ID:#{record.id}"]
48
+
45
49
  record.struct.each { |field, struct|
46
50
  strings = struct[:elements].inject([]) { |array, element|
47
51
  values = (struct[:values][element] || []).map { |v|
48
52
  (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
49
53
  }.reject { |v| v.empty? }
50
54
 
51
- array << (values.empty? ? struct[:empty] : values.join('|'))
55
+ array << (values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR))
52
56
  }
53
57
 
54
58
  dbm << "#{field.to_s.upcase}:#{ICONV_TO_LATIN1.iconv(struct[:string] % strings)}"
55
59
  }
56
- dbm << '&&&'
60
+
61
+ dbm << RECORD_SEPARATOR
57
62
 
58
63
  dbm.join(CRLF) << CRLF << CRLF
59
64
  end
@@ -0,0 +1,145 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'iconv'
30
+ require 'enumerator'
31
+
32
+ module Athena
33
+
34
+ class Formats
35
+
36
+ module Lingo
37
+
38
+ class Base < Athena::Formats
39
+
40
+ class << self
41
+
42
+ def convert(record)
43
+ record.struct.inject([]) { |terms, (field, struct)|
44
+ terms << struct[:elements].inject([]) { |array, element|
45
+ array += (struct[:values][element] || []).map { |v|
46
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
47
+ }.reject { |v| v.empty? }
48
+ }
49
+ }
50
+ end
51
+
52
+ def deferred?
53
+ true
54
+ end
55
+
56
+ private
57
+
58
+ def check_number_of_arguments(expected, actual, blow = false, &block)
59
+ return true if block ? block[actual] : expected == actual
60
+
61
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
62
+
63
+ if blow
64
+ raise FormatArgumentError, msg
65
+ else
66
+ warn msg
67
+ return false
68
+ end
69
+ end
70
+
71
+ def check_number_of_arguments!(expected, actual, &block)
72
+ check_number_of_arguments(expected, actual, true, &block)
73
+ end
74
+
75
+ end
76
+
77
+ end
78
+
79
+ # "Nasenbär\n"
80
+ class SingleWord < Athena::Formats::Lingo::Base
81
+
82
+ register_formats :out, 'lingo/single_word'
83
+
84
+ def self.convert(record)
85
+ super.flatten
86
+ end
87
+
88
+ end
89
+
90
+ # "John Vorhauer*Vorhauer, John\n"
91
+ class KeyValue < Athena::Formats::Lingo::Base
92
+
93
+ register_formats :out, 'lingo/key_value'
94
+
95
+ def self.convert(record)
96
+ super.map { |terms|
97
+ next unless check_number_of_arguments(2, terms.size)
98
+
99
+ terms.join('*')
100
+ }.compact
101
+ end
102
+
103
+ end
104
+
105
+ # "Essen,essen #v Essen #s Esse #s\n"
106
+ class WordClass < Athena::Formats::Lingo::Base
107
+
108
+ register_formats :out, 'lingo/word_class'
109
+
110
+ def self.convert(record)
111
+ super.map { |terms|
112
+ next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
113
+ actual > 1 && actual % 2 == 1
114
+ }
115
+
116
+ [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
117
+ "#{form} ##{wc}"
118
+ }.join(' ')].join(',')
119
+ }.compact
120
+ end
121
+
122
+ end
123
+
124
+ # "Fax;Faxkopie;Telefax\n"
125
+ class MultiValue < Athena::Formats::Lingo::Base
126
+
127
+ register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
128
+
129
+ def self.convert(record)
130
+ super.map { |terms|
131
+ next unless check_number_of_arguments('> 1', terms.size) { |actual|
132
+ actual > 1
133
+ }
134
+
135
+ terms.join(';')
136
+ }.compact
137
+ end
138
+
139
+ end
140
+
141
+ end
142
+
143
+ end
144
+
145
+ end
@@ -187,12 +187,12 @@ module Athena
187
187
  private
188
188
 
189
189
  def level
190
- BaseSpec.instance_variable_get :@level
190
+ BaseSpec.instance_variable_get(:@level)
191
191
  end
192
192
 
193
193
  def step(direction)
194
194
  steps = { :down => 1, :up => -1 }
195
- BaseSpec.instance_variable_set :@level, level + steps[direction]
195
+ BaseSpec.instance_variable_set(:@level, level + steps[direction])
196
196
  end
197
197
 
198
198
  end
@@ -244,7 +244,7 @@ module Athena
244
244
  def text(context, data)
245
245
  super
246
246
 
247
- record.update name, data
247
+ record.update(name, data)
248
248
  end
249
249
 
250
250
  end
@@ -260,7 +260,7 @@ module Athena
260
260
  super()
261
261
 
262
262
  @parent = parent
263
- default! self
263
+ default!(self)
264
264
  end
265
265
 
266
266
  end
@@ -32,6 +32,9 @@ module Athena
32
32
 
33
33
  include Util
34
34
 
35
+ DEFAULT_SEPARATOR = ', '
36
+ DEFAULT_EMPTY = '<<EMPTY>>'
37
+
35
38
  attr_reader :config, :spec
36
39
  attr_accessor :block
37
40
 
@@ -61,12 +64,13 @@ module Athena
61
64
  when Hash
62
65
  elements = v[:elements] || v[:element].to_a
63
66
 
64
- raise ArgumentError, "no elements specified for field #{field}" unless elements.is_a?(Array)
67
+ raise ArgumentError, "no elements specified for field #{field}" \
68
+ unless elements.is_a?(Array)
65
69
  else
66
70
  raise ArgumentError, "illegal value for field #{field}"
67
71
  end
68
72
 
69
- separator = v[:separator] || ', '
73
+ separator = v[:separator] || DEFAULT_SEPARATOR
70
74
 
71
75
  elements.each { |element|
72
76
  verbose(:config) do
@@ -75,7 +79,7 @@ module Athena
75
79
 
76
80
  (hash[element] ||= {})[field] = {
77
81
  :string => v[:string] || ['%s'] * elements.size * separator,
78
- :empty => v[:empty] || '<<EMPTY>>',
82
+ :empty => v[:empty] || DEFAULT_EMPTY,
79
83
  :elements => elements
80
84
  }
81
85
  }
@@ -53,11 +53,11 @@ module Athena
53
53
  attr_reader :struct, :block, :id
54
54
 
55
55
  def initialize(block, id = object_id.abs)
56
- self.class.records << self
57
-
58
56
  @struct = {}
59
57
  @block = block
60
58
  @id = id
59
+
60
+ add_record
61
61
  end
62
62
 
63
63
  def fill(field, config)
@@ -89,6 +89,12 @@ module Athena
89
89
  Athena::Formats[:out, format].convert(self)
90
90
  end
91
91
 
92
+ private
93
+
94
+ def add_record
95
+ self.class.records << self
96
+ end
97
+
92
98
  class NoRecordError < StandardError
93
99
  end
94
100
 
@@ -33,7 +33,7 @@ module Athena
33
33
  extend self
34
34
 
35
35
  def verbose(what, klass = self.class, &block)
36
- if $_VERBOSE[what]
36
+ if $Verbose[what]
37
37
  klass.send(:define_method, :spit) { |msg|
38
38
  warn "*#{what}: #{msg}"
39
39
  }
@@ -32,7 +32,7 @@ module Athena
32
32
 
33
33
  MAJOR = 0
34
34
  MINOR = 0
35
- TINY = 1
35
+ TINY = 2
36
36
 
37
37
  class << self
38
38
 
metadata CHANGED
@@ -1,43 +1,56 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.4
3
- specification_version: 1
4
2
  name: athena
5
3
  version: !ruby/object:Gem::Version
6
- version: 0.0.1.53
7
- date: 2007-10-11 00:00:00 +02:00
8
- summary: Convert database files to various formats
9
- require_paths:
10
- - lib
11
- email: jens.wille@uni-koeln.de
12
- homepage:
13
- rubyforge_project:
14
- description:
15
- autorequire:
16
- default_executable:
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 0.0.2.56
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
6
  authors:
30
7
  - Jens Wille
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-01-08 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: xmlstreamin
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: ruby-nuggets
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: "0"
32
+ version:
33
+ description: Convert database files to various formats.
34
+ email: jens.wille@uni-koeln.de
35
+ executables:
36
+ - athena
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - COPYING
42
+ - ChangeLog
31
43
  files:
32
- - lib/athena.rb
33
44
  - lib/athena/formats.rb
34
45
  - lib/athena/version.rb
35
46
  - lib/athena/util.rb
36
- - lib/athena/record.rb
37
- - lib/athena/parser.rb
38
47
  - lib/athena/formats/sisis.rb
39
48
  - lib/athena/formats/xml.rb
40
49
  - lib/athena/formats/dbm.rb
50
+ - lib/athena/formats/lingo.rb
51
+ - lib/athena/record.rb
52
+ - lib/athena/parser.rb
53
+ - lib/athena.rb
41
54
  - bin/athena
42
55
  - COPYING
43
56
  - README
@@ -46,36 +59,39 @@ files:
46
59
  - example/sisis-ex.txt
47
60
  - example/config.yaml
48
61
  - example/example.xml
49
- test_files: []
50
-
51
- rdoc_options: []
52
-
53
- extra_rdoc_files:
62
+ has_rdoc: true
63
+ homepage: http://prometheus.rubyforge.org/athena
64
+ post_install_message:
65
+ rdoc_options:
66
+ - --all
67
+ - --inline-source
68
+ - --charset
69
+ - UTF-8
70
+ - --main
54
71
  - README
55
- - COPYING
56
- - ChangeLog
57
- executables:
58
- - athena
59
- extensions: []
60
-
72
+ - --title
73
+ - athena Application documentation
74
+ - --line-numbers
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: "0"
82
+ version:
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: "0"
88
+ version:
61
89
  requirements: []
62
90
 
63
- dependencies:
64
- - !ruby/object:Gem::Dependency
65
- name: xmlstreamin
66
- version_requirement:
67
- version_requirements: !ruby/object:Gem::Version::Requirement
68
- requirements:
69
- - - ">"
70
- - !ruby/object:Gem::Version
71
- version: 0.0.0
72
- version:
73
- - !ruby/object:Gem::Dependency
74
- name: ruby-nuggets
75
- version_requirement:
76
- version_requirements: !ruby/object:Gem::Version::Requirement
77
- requirements:
78
- - - ">"
79
- - !ruby/object:Gem::Version
80
- version: 0.0.0
81
- version:
91
+ rubyforge_project: prometheus
92
+ rubygems_version: 1.0.1
93
+ signing_key:
94
+ specification_version: 2
95
+ summary: Convert database files to various formats.
96
+ test_files: []
97
+