athena 0.0.1.53 → 0.0.2.56

Sign up to get free protection for your applications and to get access to all the features.
data/COPYING CHANGED
@@ -1,4 +1,4 @@
1
- = License for ruby-nuggets
1
+ = License for athena
2
2
 
3
3
  GNU GENERAL PUBLIC LICENSE
4
4
  Version 3, 29 June 2007
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to athena version 0.0.1
5
+ This documentation refers to athena version 0.0.2
6
6
 
7
7
 
8
8
  == DESCRIPTION
data/Rakefile CHANGED
@@ -1,23 +1,21 @@
1
- require 'lib/athena/version'
1
+ begin
2
+ require 'hen'
3
+ rescue LoadError
4
+ abort "Please install the 'hen' gem first."
5
+ end
2
6
 
3
- FILES = FileList['lib/**/*.rb'].to_a
4
- EXECS = FileList['bin/*'].to_a
5
- RDOCS = %w[README COPYING ChangeLog]
6
- OTHER = FileList['[A-Z]*', 'example/*'].to_a
7
+ require 'lib/athena/version'
7
8
 
8
- task(:doc_spec) {{
9
- :title => 'athena Application documentation',
10
- :rdoc_files => RDOCS + FILES
11
- }}
9
+ Hen.lay! {{
10
+ :rubyforge => {
11
+ :package => 'athena'
12
+ },
12
13
 
13
- task(:gem_spec) {{
14
- :name => 'athena',
15
- :version => Athena::VERSION,
16
- :summary => 'Convert database files to various formats',
17
- :files => FILES + EXECS + OTHER,
18
- :require_path => 'lib',
19
- :bindir => 'bin',
20
- :executables => EXECS,
21
- :extra_rdoc_files => RDOCS,
22
- :dependencies => %w[xmlstreamin ruby-nuggets]
14
+ :gem => {
15
+ :version => Athena::VERSION,
16
+ :summary => 'Convert database files to various formats.',
17
+ :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
18
+ :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
19
+ :dependencies => %w[xmlstreamin ruby-nuggets]
20
+ }
23
21
  }}
data/bin/athena CHANGED
@@ -39,7 +39,7 @@ USAGE = "Usage: #{$0} [-h|--help] [options]"
39
39
  abort USAGE if ARGV.empty?
40
40
 
41
41
  # Global variable to handle verbosity
42
- $_VERBOSE = {}
42
+ $Verbose = {}
43
43
 
44
44
  options = {
45
45
  :config => 'config.yaml',
@@ -54,16 +54,16 @@ OptionParser.new { |opts|
54
54
  opts.separator ''
55
55
  opts.separator 'Options:'
56
56
 
57
- opts.on('-c', '--config YAML', "Config file [Default: #{options[:config]}#{' (currently not present)' unless File.readable? options[:config]}]") { |f|
58
- abort "Can't find config file: #{f}." unless File.readable? f
57
+ opts.on('-c', '--config YAML', "Config file [Default: #{options[:config]}#{' (currently not present)' unless File.readable?(options[:config])}]") { |f|
58
+ abort "Can't find config file: #{f}." unless File.readable?(f)
59
59
 
60
60
  options[:config] = f
61
61
  }
62
62
 
63
63
  opts.separator ''
64
64
 
65
- opts.on('-i', '--input FILE', "Input file [Default: #{options[:input] == STDIN ? 'STDIN' : options[:input]}]") { |f|
66
- abort "Can't find input file: #{f}." unless File.readable? f
65
+ opts.on('-i', '--input FILE', "Input file [Default: STDIN]") { |f|
66
+ abort "Can't find input file: #{f}." unless File.readable?(f)
67
67
 
68
68
  options[:input] = File.open(f, 'r')
69
69
 
@@ -78,8 +78,11 @@ OptionParser.new { |opts|
78
78
 
79
79
  opts.on('-L', '--list-specs', "List available input formats (specs) and exit") {
80
80
  puts "Available input formats (specs):"
81
- Athena.input_formats.each { |s, k|
82
- puts " - #{s} = #{k}"
81
+
82
+ formats = Athena.input_formats
83
+ max = formats.map { |a, _| a.length }.max
84
+ formats.each { |f, k|
85
+ puts " - %-#{max}s = %s" % [f, k]
83
86
  }
84
87
 
85
88
  exit 0
@@ -87,7 +90,7 @@ OptionParser.new { |opts|
87
90
 
88
91
  opts.separator ''
89
92
 
90
- opts.on('-o', '--output FILE', "Output file [Default: #{options[:output] == STDOUT ? 'STDOUT' : options[:output]}]") { |f|
93
+ opts.on('-o', '--output FILE', "Output file [Default: STDOUT]") { |f|
91
94
  options[:output] = File.open(f, 'w')
92
95
 
93
96
  options[:format_fallback] = f.split('.').last.downcase
@@ -99,8 +102,11 @@ OptionParser.new { |opts|
99
102
 
100
103
  opts.on('-l', '--list-formats', "List available output formats and exit") {
101
104
  puts "Available output formats:"
102
- Athena.output_formats.each { |f, k|
103
- puts " - #{f} = #{k}"
105
+
106
+ formats = Athena.output_formats
107
+ max = formats.map { |a, _| a.length }.max
108
+ formats.each { |f, k|
109
+ puts " - %-#{max}s = %s" % [f, k]
104
110
  }
105
111
 
106
112
  exit 0
@@ -117,10 +123,10 @@ OptionParser.new { |opts|
117
123
 
118
124
  opts.on('-v', '--verbose [WHAT]', "Be verbose about what's being done. Optional argument is a comma-separated", "list of what should be output, or 'all' [Default: 'all']") { |what|
119
125
  if what.nil? || what == 'all'
120
- $_VERBOSE.default = true
126
+ $Verbose.default = true
121
127
  else
122
128
  what.split(',').each { |w|
123
- $_VERBOSE[w.to_sym] = true
129
+ $Verbose[w.to_sym] = true
124
130
  }
125
131
  end
126
132
  }
@@ -136,11 +142,11 @@ OptionParser.new { |opts|
136
142
 
137
143
  spec = options[:spec] || options[:spec_fallback]
138
144
  abort "No input format (spec) specified and none could be inferred." unless spec
139
- abort "Invalid input format (spec): #{spec}. Use '-L' to get a list of available specs." unless Athena.valid_input_format? spec
145
+ abort "Invalid input format (spec): #{spec}. Use '-L' to get a list of available specs." unless Athena.valid_input_format?(spec)
140
146
 
141
147
  format = options[:format] || options[:format_fallback]
142
148
  abort "No output format specified and none could be inferred." unless format
143
- abort "Invalid output format: #{format}. Use '-l' to get a list of available formats." unless Athena.valid_output_format? format
149
+ abort "Invalid output format: #{format}. Use '-l' to get a list of available formats." unless Athena.valid_output_format?(format)
144
150
 
145
151
  yaml = YAML.load_file(options[:config])
146
152
  if t = options[:target]
@@ -156,9 +162,21 @@ else
156
162
  end
157
163
  abort "Config not found for target: #{target}." unless config
158
164
 
159
- records = Athena.parser(config, spec).parse(options[:input]) { |record|
160
- options[:output].puts record.to(format)
161
- }
165
+ parser = Athena.parser(config, spec)
166
+
167
+ if Athena.deferred_output?(format)
168
+ records = parser.parse(options[:input])
169
+
170
+ records.map { |record|
171
+ record.to(format)
172
+ }.flatten.sort.uniq.each { |line|
173
+ options[:output].puts line
174
+ }
175
+ else
176
+ records = parser.parse(options[:input]) { |record|
177
+ options[:output].puts record.to(format)
178
+ }
179
+ end
162
180
 
163
181
  Athena::Util.verbose(:count) do
164
182
  spit records.size
@@ -65,4 +65,8 @@ module Athena
65
65
  Formats.valid_format?(:out, format)
66
66
  end
67
67
 
68
+ def deferred_output?(format)
69
+ Formats[:out, format].deferred?
70
+ end
71
+
68
72
  end
@@ -35,7 +35,7 @@ module Athena
35
35
  class << self
36
36
 
37
37
  def formats
38
- Formats.instance_variable_get :@formats
38
+ Formats.instance_variable_get(:@formats)
39
39
  end
40
40
 
41
41
  def [](direction, format)
@@ -43,7 +43,11 @@ module Athena
43
43
  end
44
44
 
45
45
  def valid_format?(direction, format)
46
- formats[direction].has_key? format
46
+ formats[direction].has_key?(format)
47
+ end
48
+
49
+ def deferred?
50
+ false
47
51
  end
48
52
 
49
53
  def convert(*args)
@@ -53,6 +57,11 @@ module Athena
53
57
  private
54
58
 
55
59
  def register_format(direction, format)
60
+ if existing = formats[direction][format]
61
+ raise DuplicateFormatDefinitionError,
62
+ "format already defined (#{direction}): #{format} = #{existing}"
63
+ end
64
+
56
65
  formats[direction][format] = self
57
66
  end
58
67
 
@@ -68,6 +77,12 @@ module Athena
68
77
  raise NotImplementedError, 'must be defined by sub-class'
69
78
  end
70
79
 
80
+ class DuplicateFormatDefinitionError < StandardError
81
+ end
82
+
83
+ class FormatArgumentError < ArgumentError
84
+ end
85
+
71
86
  end
72
87
 
73
88
  end
@@ -40,20 +40,25 @@ module Athena
40
40
 
41
41
  ICONV_TO_LATIN1 = Iconv.new('latin1', 'utf-8')
42
42
 
43
+ VALUE_SEPARATOR = '|'
44
+ RECORD_SEPARATOR = '&&&'
45
+
43
46
  def self.convert(record)
44
47
  dbm = ["ID:#{record.id}"]
48
+
45
49
  record.struct.each { |field, struct|
46
50
  strings = struct[:elements].inject([]) { |array, element|
47
51
  values = (struct[:values][element] || []).map { |v|
48
52
  (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
49
53
  }.reject { |v| v.empty? }
50
54
 
51
- array << (values.empty? ? struct[:empty] : values.join('|'))
55
+ array << (values.empty? ? struct[:empty] : values.join(VALUE_SEPARATOR))
52
56
  }
53
57
 
54
58
  dbm << "#{field.to_s.upcase}:#{ICONV_TO_LATIN1.iconv(struct[:string] % strings)}"
55
59
  }
56
- dbm << '&&&'
60
+
61
+ dbm << RECORD_SEPARATOR
57
62
 
58
63
  dbm.join(CRLF) << CRLF << CRLF
59
64
  end
@@ -0,0 +1,145 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of athena, the database file converter. #
5
+ # #
6
+ # Copyright (C) 2007 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'iconv'
30
+ require 'enumerator'
31
+
32
+ module Athena
33
+
34
+ class Formats
35
+
36
+ module Lingo
37
+
38
+ class Base < Athena::Formats
39
+
40
+ class << self
41
+
42
+ def convert(record)
43
+ record.struct.inject([]) { |terms, (field, struct)|
44
+ terms << struct[:elements].inject([]) { |array, element|
45
+ array += (struct[:values][element] || []).map { |v|
46
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
47
+ }.reject { |v| v.empty? }
48
+ }
49
+ }
50
+ end
51
+
52
+ def deferred?
53
+ true
54
+ end
55
+
56
+ private
57
+
58
+ def check_number_of_arguments(expected, actual, blow = false, &block)
59
+ return true if block ? block[actual] : expected == actual
60
+
61
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
62
+
63
+ if blow
64
+ raise FormatArgumentError, msg
65
+ else
66
+ warn msg
67
+ return false
68
+ end
69
+ end
70
+
71
+ def check_number_of_arguments!(expected, actual, &block)
72
+ check_number_of_arguments(expected, actual, true, &block)
73
+ end
74
+
75
+ end
76
+
77
+ end
78
+
79
+ # "Nasenbär\n"
80
+ class SingleWord < Athena::Formats::Lingo::Base
81
+
82
+ register_formats :out, 'lingo/single_word'
83
+
84
+ def self.convert(record)
85
+ super.flatten
86
+ end
87
+
88
+ end
89
+
90
+ # "John Vorhauer*Vorhauer, John\n"
91
+ class KeyValue < Athena::Formats::Lingo::Base
92
+
93
+ register_formats :out, 'lingo/key_value'
94
+
95
+ def self.convert(record)
96
+ super.map { |terms|
97
+ next unless check_number_of_arguments(2, terms.size)
98
+
99
+ terms.join('*')
100
+ }.compact
101
+ end
102
+
103
+ end
104
+
105
+ # "Essen,essen #v Essen #s Esse #s\n"
106
+ class WordClass < Athena::Formats::Lingo::Base
107
+
108
+ register_formats :out, 'lingo/word_class'
109
+
110
+ def self.convert(record)
111
+ super.map { |terms|
112
+ next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
113
+ actual > 1 && actual % 2 == 1
114
+ }
115
+
116
+ [terms.shift, terms.to_enum(:each_slice, 2).map { |form, wc|
117
+ "#{form} ##{wc}"
118
+ }.join(' ')].join(',')
119
+ }.compact
120
+ end
121
+
122
+ end
123
+
124
+ # "Fax;Faxkopie;Telefax\n"
125
+ class MultiValue < Athena::Formats::Lingo::Base
126
+
127
+ register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
128
+
129
+ def self.convert(record)
130
+ super.map { |terms|
131
+ next unless check_number_of_arguments('> 1', terms.size) { |actual|
132
+ actual > 1
133
+ }
134
+
135
+ terms.join(';')
136
+ }.compact
137
+ end
138
+
139
+ end
140
+
141
+ end
142
+
143
+ end
144
+
145
+ end
@@ -187,12 +187,12 @@ module Athena
187
187
  private
188
188
 
189
189
  def level
190
- BaseSpec.instance_variable_get :@level
190
+ BaseSpec.instance_variable_get(:@level)
191
191
  end
192
192
 
193
193
  def step(direction)
194
194
  steps = { :down => 1, :up => -1 }
195
- BaseSpec.instance_variable_set :@level, level + steps[direction]
195
+ BaseSpec.instance_variable_set(:@level, level + steps[direction])
196
196
  end
197
197
 
198
198
  end
@@ -244,7 +244,7 @@ module Athena
244
244
  def text(context, data)
245
245
  super
246
246
 
247
- record.update name, data
247
+ record.update(name, data)
248
248
  end
249
249
 
250
250
  end
@@ -260,7 +260,7 @@ module Athena
260
260
  super()
261
261
 
262
262
  @parent = parent
263
- default! self
263
+ default!(self)
264
264
  end
265
265
 
266
266
  end
@@ -32,6 +32,9 @@ module Athena
32
32
 
33
33
  include Util
34
34
 
35
+ DEFAULT_SEPARATOR = ', '
36
+ DEFAULT_EMPTY = '<<EMPTY>>'
37
+
35
38
  attr_reader :config, :spec
36
39
  attr_accessor :block
37
40
 
@@ -61,12 +64,13 @@ module Athena
61
64
  when Hash
62
65
  elements = v[:elements] || v[:element].to_a
63
66
 
64
- raise ArgumentError, "no elements specified for field #{field}" unless elements.is_a?(Array)
67
+ raise ArgumentError, "no elements specified for field #{field}" \
68
+ unless elements.is_a?(Array)
65
69
  else
66
70
  raise ArgumentError, "illegal value for field #{field}"
67
71
  end
68
72
 
69
- separator = v[:separator] || ', '
73
+ separator = v[:separator] || DEFAULT_SEPARATOR
70
74
 
71
75
  elements.each { |element|
72
76
  verbose(:config) do
@@ -75,7 +79,7 @@ module Athena
75
79
 
76
80
  (hash[element] ||= {})[field] = {
77
81
  :string => v[:string] || ['%s'] * elements.size * separator,
78
- :empty => v[:empty] || '<<EMPTY>>',
82
+ :empty => v[:empty] || DEFAULT_EMPTY,
79
83
  :elements => elements
80
84
  }
81
85
  }
@@ -53,11 +53,11 @@ module Athena
53
53
  attr_reader :struct, :block, :id
54
54
 
55
55
  def initialize(block, id = object_id.abs)
56
- self.class.records << self
57
-
58
56
  @struct = {}
59
57
  @block = block
60
58
  @id = id
59
+
60
+ add_record
61
61
  end
62
62
 
63
63
  def fill(field, config)
@@ -89,6 +89,12 @@ module Athena
89
89
  Athena::Formats[:out, format].convert(self)
90
90
  end
91
91
 
92
+ private
93
+
94
+ def add_record
95
+ self.class.records << self
96
+ end
97
+
92
98
  class NoRecordError < StandardError
93
99
  end
94
100
 
@@ -33,7 +33,7 @@ module Athena
33
33
  extend self
34
34
 
35
35
  def verbose(what, klass = self.class, &block)
36
- if $_VERBOSE[what]
36
+ if $Verbose[what]
37
37
  klass.send(:define_method, :spit) { |msg|
38
38
  warn "*#{what}: #{msg}"
39
39
  }
@@ -32,7 +32,7 @@ module Athena
32
32
 
33
33
  MAJOR = 0
34
34
  MINOR = 0
35
- TINY = 1
35
+ TINY = 2
36
36
 
37
37
  class << self
38
38
 
metadata CHANGED
@@ -1,43 +1,56 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.4
3
- specification_version: 1
4
2
  name: athena
5
3
  version: !ruby/object:Gem::Version
6
- version: 0.0.1.53
7
- date: 2007-10-11 00:00:00 +02:00
8
- summary: Convert database files to various formats
9
- require_paths:
10
- - lib
11
- email: jens.wille@uni-koeln.de
12
- homepage:
13
- rubyforge_project:
14
- description:
15
- autorequire:
16
- default_executable:
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 0.0.2.56
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
6
  authors:
30
7
  - Jens Wille
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-01-08 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: xmlstreamin
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: ruby-nuggets
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: "0"
32
+ version:
33
+ description: Convert database files to various formats.
34
+ email: jens.wille@uni-koeln.de
35
+ executables:
36
+ - athena
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - COPYING
42
+ - ChangeLog
31
43
  files:
32
- - lib/athena.rb
33
44
  - lib/athena/formats.rb
34
45
  - lib/athena/version.rb
35
46
  - lib/athena/util.rb
36
- - lib/athena/record.rb
37
- - lib/athena/parser.rb
38
47
  - lib/athena/formats/sisis.rb
39
48
  - lib/athena/formats/xml.rb
40
49
  - lib/athena/formats/dbm.rb
50
+ - lib/athena/formats/lingo.rb
51
+ - lib/athena/record.rb
52
+ - lib/athena/parser.rb
53
+ - lib/athena.rb
41
54
  - bin/athena
42
55
  - COPYING
43
56
  - README
@@ -46,36 +59,39 @@ files:
46
59
  - example/sisis-ex.txt
47
60
  - example/config.yaml
48
61
  - example/example.xml
49
- test_files: []
50
-
51
- rdoc_options: []
52
-
53
- extra_rdoc_files:
62
+ has_rdoc: true
63
+ homepage: http://prometheus.rubyforge.org/athena
64
+ post_install_message:
65
+ rdoc_options:
66
+ - --all
67
+ - --inline-source
68
+ - --charset
69
+ - UTF-8
70
+ - --main
54
71
  - README
55
- - COPYING
56
- - ChangeLog
57
- executables:
58
- - athena
59
- extensions: []
60
-
72
+ - --title
73
+ - athena Application documentation
74
+ - --line-numbers
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: "0"
82
+ version:
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: "0"
88
+ version:
61
89
  requirements: []
62
90
 
63
- dependencies:
64
- - !ruby/object:Gem::Dependency
65
- name: xmlstreamin
66
- version_requirement:
67
- version_requirements: !ruby/object:Gem::Version::Requirement
68
- requirements:
69
- - - ">"
70
- - !ruby/object:Gem::Version
71
- version: 0.0.0
72
- version:
73
- - !ruby/object:Gem::Dependency
74
- name: ruby-nuggets
75
- version_requirement:
76
- version_requirements: !ruby/object:Gem::Version::Requirement
77
- requirements:
78
- - - ">"
79
- - !ruby/object:Gem::Version
80
- version: 0.0.0
81
- version:
91
+ rubyforge_project: prometheus
92
+ rubygems_version: 1.0.1
93
+ signing_key:
94
+ specification_version: 2
95
+ summary: Convert database files to various formats.
96
+ test_files: []
97
+