athena 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to athena version 0.0.9
5
+ This documentation refers to athena version 0.1.0
6
6
 
7
7
 
8
8
  == DESCRIPTION
data/bin/athena CHANGED
@@ -77,12 +77,10 @@ OptionParser.new { |opts|
77
77
  }
78
78
 
79
79
  opts.on('-L', '--list-specs', "List available input formats (specs) and exit") {
80
- puts "Available input formats (specs):"
80
+ puts 'Available input formats (specs):'
81
81
 
82
- formats = Athena.input_formats
83
- max = formats.map { |a, _| a.length }.max
84
- formats.each { |f, k|
85
- puts " - %-#{max}s = %s" % [f, k]
82
+ Athena.input_formats.each { |f, k|
83
+ puts " - #{f}#{" (= #{k})" if f != k.to_s}"
86
84
  }
87
85
 
88
86
  exit 0
@@ -101,12 +99,10 @@ OptionParser.new { |opts|
101
99
  }
102
100
 
103
101
  opts.on('-l', '--list-formats', "List available output formats and exit") {
104
- puts "Available output formats:"
102
+ puts 'Available output formats:'
105
103
 
106
- formats = Athena.output_formats
107
- max = formats.map { |a, _| a.length }.max
108
- formats.each { |f, k|
109
- puts " - %-#{max}s = %s" % [f, k]
104
+ Athena.output_formats.each { |f, k|
105
+ puts " - #{f}#{" (= #{k})" if f != k.to_s}"
110
106
  }
111
107
 
112
108
  exit 0
@@ -173,8 +169,10 @@ if Athena.deferred_output?(format)
173
169
  options[:output].puts line
174
170
  }
175
171
  else
176
- res = parser.parse(options[:input]) { |record|
177
- options[:output].puts record.to(format)
172
+ res = Athena.with_format(format) { |_format|
173
+ parser.parse(options[:input]) { |record|
174
+ options[:output].puts record.to(_format)
175
+ }
178
176
  }
179
177
  end
180
178
 
@@ -30,9 +30,9 @@
30
30
  # output formats. It's accompanied by a corresponding script that gives access
31
31
  # to all its converting features.
32
32
  #
33
- # In order to support additional input and/or output formats, Athena::Formats
34
- # needs to be sub-classed and, respectively, an instance method _parse_ or a
35
- # class method _convert_ supplied. This way, a specific format can even function
33
+ # In order to support additional input and/or output formats, Athena::Formats::Base
34
+ # needs to be sub-classed and, respectively, an instance method _parse_ or an
35
+ # instance method _convert_ supplied. This way, a specific format can even function
36
36
  # as both input and output format.
37
37
 
38
38
  module Athena
@@ -53,23 +53,27 @@ module Athena
53
53
  end
54
54
 
55
55
  def input_formats
56
- Formats.formats[:in].sort
56
+ Formats::Base.formats[:in].sort
57
57
  end
58
58
 
59
59
  def valid_input_format?(format)
60
- Formats.valid_format?(:in, format)
60
+ Formats::Base.valid_format?(:in, format)
61
61
  end
62
62
 
63
63
  def output_formats
64
- Formats.formats[:out].sort
64
+ Formats::Base.formats[:out].sort
65
65
  end
66
66
 
67
67
  def valid_output_format?(format)
68
- Formats.valid_format?(:out, format)
68
+ Formats::Base.valid_format?(:out, format)
69
69
  end
70
70
 
71
71
  def deferred_output?(format)
72
72
  Formats[:out, format].deferred?
73
73
  end
74
74
 
75
+ def with_format(format, &block)
76
+ Formats[:out, format].wrap(&block)
77
+ end
78
+
75
79
  end
@@ -3,7 +3,7 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007-2008 University of Cologne, #
6
+ # Copyright (C) 2007-2009 University of Cologne, #
7
7
  # Albertus-Magnus-Platz, #
8
8
  # 50932 Cologne, Germany #
9
9
  # #
@@ -26,56 +26,99 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- class Athena::Formats
29
+ module Athena::Formats
30
+
31
+ def self.[](direction, format)
32
+ if direction == :out
33
+ if format.class < Base
34
+ if format.class.direction != direction
35
+ raise DirectionMismatchError,
36
+ "expected #{direction}, got #{format.class.direction}"
37
+ else
38
+ format
39
+ end
40
+ else
41
+ Base.formats[direction][format].new
42
+ end
43
+ else
44
+ Base.formats[direction][format]
45
+ end
46
+ end
30
47
 
31
- @formats = { :in => {}, :out => {} }
48
+ class Base
32
49
 
33
- class << self
50
+ @formats = { :in => {}, :out => {} }
34
51
 
35
- def formats
36
- Athena::Formats.instance_variable_get(:@formats)
37
- end
52
+ class << self
38
53
 
39
- def [](direction, format)
40
- formats[direction][format]
41
- end
54
+ def formats
55
+ Base.instance_variable_get(:@formats)
56
+ end
42
57
 
43
- def valid_format?(direction, format)
44
- formats[direction].has_key?(format)
45
- end
58
+ def valid_format?(direction, format)
59
+ if format.class < Base
60
+ direction == format.class.direction
61
+ else
62
+ formats[direction].has_key?(format)
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def register_format(direction, *aliases, &block)
69
+ format = name.split('::').last.
70
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
71
+ gsub(/([a-z\d])([A-Z])/, '\1_\2').
72
+ downcase
73
+
74
+ register_format!(direction, format, *aliases, &block)
75
+ end
76
+
77
+ def register_format!(direction, format, *aliases, &block)
78
+ raise "must be a sub-class of #{Base}" unless self < Base
79
+
80
+ klass = Class.new(self, &block)
81
+
82
+ klass.instance_eval %Q{
83
+ def direction; #{direction.inspect}; end
84
+ def name; '#{format}::#{direction}'; end
85
+ def to_s; '#{format}'; end
86
+ }
87
+
88
+ [format, *aliases].each { |name|
89
+ if existing = formats[direction][name]
90
+ raise DuplicateFormatDefinitionError,
91
+ "format already defined (#{direction}): #{name}"
92
+ else
93
+ formats[direction][name] = klass
94
+ end
95
+ }
96
+ end
46
97
 
47
- def deferred?
48
- false
49
98
  end
50
99
 
51
- def convert(*args)
100
+ def parse(*args)
52
101
  raise NotImplementedError, 'must be defined by sub-class'
53
102
  end
54
103
 
55
- private
56
-
57
- def register_format(direction, format)
58
- if existing = formats[direction][format]
59
- raise DuplicateFormatDefinitionError,
60
- "format already defined (#{direction}): #{format} = #{existing}"
61
- end
104
+ def convert(record)
105
+ raise NotImplementedError, 'must be defined by sub-class'
106
+ end
62
107
 
63
- formats[direction][format] = self
108
+ def wrap
109
+ yield self
64
110
  end
65
111
 
66
- def register_formats(direction, *formats)
67
- formats.each { |format|
68
- register_format(direction, format)
69
- }
112
+ def deferred?
113
+ false
70
114
  end
71
115
 
72
116
  end
73
117
 
74
- def parse(*args)
75
- raise NotImplementedError, 'must be defined by sub-class'
118
+ class DuplicateFormatDefinitionError < StandardError
76
119
  end
77
120
 
78
- class DuplicateFormatDefinitionError < StandardError
121
+ class DirectionMismatchError < ArgumentError
79
122
  end
80
123
 
81
124
  class FormatArgumentError < ArgumentError
@@ -83,6 +126,4 @@ class Athena::Formats
83
126
 
84
127
  end
85
128
 
86
- Dir[__FILE__.sub(/\.rb$/, '/**/*.rb')].each { |rb|
87
- require rb
88
- }
129
+ Dir[__FILE__.sub(/\.rb\z/, '/**/*.rb')].sort.each { |rb| require rb }
@@ -28,11 +28,9 @@
28
28
 
29
29
  require 'iconv'
30
30
 
31
- class Athena::Formats
31
+ module Athena::Formats
32
32
 
33
- class DBM < Athena::Formats
34
-
35
- register_formats :out, 'dbm', 'midos'
33
+ class DBM < Base
36
34
 
37
35
  CRLF = "\015\012"
38
36
 
@@ -41,7 +39,9 @@ class Athena::Formats
41
39
  VALUE_SEPARATOR = '|'
42
40
  RECORD_SEPARATOR = '&&&'
43
41
 
44
- def self.convert(record)
42
+ register_format :out, 'midos'
43
+
44
+ def convert(record)
45
45
  dbm = ["ID:#{record.id}"]
46
46
 
47
47
  record.struct.each { |field, struct|
@@ -31,31 +31,33 @@ require 'rubygems'
31
31
  gem 'ferret', ENV['FERRET_VERSION'] if ENV['FERRET_VERSION']
32
32
  require 'ferret'
33
33
 
34
- class Athena::Formats
34
+ module Athena::Formats
35
35
 
36
- class Ferret < Athena::Formats
36
+ class Ferret < Base
37
37
 
38
- register_format :in, 'ferret'
38
+ register_format :in do
39
39
 
40
- attr_reader :record_element, :config, :parser, :match_all_query
40
+ attr_reader :record_element, :config, :parser, :match_all_query
41
41
 
42
- def initialize(parser)
43
- config = parser.config.dup
42
+ def initialize(parser)
43
+ config = parser.config.dup
44
44
 
45
- case @record_element = config.delete(:__record_element)
46
- when String
47
- # fine!
48
- when nil
49
- raise NoRecordElementError, 'no record element specified'
50
- else
51
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
45
+ case @record_element = config.delete(:__record_element)
46
+ when String
47
+ # fine!
48
+ when nil
49
+ raise NoRecordElementError, 'no record element specified'
50
+ else
51
+ raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
52
+ end
53
+
54
+ @config = config
55
+ @parser = parser
52
56
  end
53
57
 
54
- @config = config
55
- @parser = parser
56
58
  end
57
59
 
58
- def parse(source)
60
+ def parse(source, &block)
59
61
  path = source.path
60
62
 
61
63
  # make sure the index can be opened
@@ -82,7 +84,7 @@ class Athena::Formats
82
84
  unless index.deleted?(i)
83
85
  doc = index[i]
84
86
 
85
- Athena::Record.new(parser.block, doc[record_element]) { |record|
87
+ Athena::Record.new(doc[record_element], block) { |record|
86
88
  config.each { |element, field_config|
87
89
  record.update(element, doc[element], field_config)
88
90
  }
@@ -29,68 +29,56 @@
29
29
  require 'iconv'
30
30
  require 'enumerator'
31
31
 
32
- class Athena::Formats
33
-
34
- module Lingo
35
-
36
- class Base < Athena::Formats
37
-
38
- class << self
39
-
40
- def convert(record)
41
- record.struct.inject([]) { |terms, (field, struct)|
42
- terms << struct[:elements].inject([]) { |array, element|
43
- array += (struct[:values][element] || []).map { |v|
44
- (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
45
- }.reject { |v| v.empty? }
46
- }
47
- }
48
- end
49
-
50
- def deferred?
51
- true
52
- end
53
-
54
- private
32
+ module Athena::Formats
33
+
34
+ class Lingo < Base
35
+
36
+ def convert(record)
37
+ record.struct.inject([]) { |terms, (field, struct)|
38
+ terms << struct[:elements].inject([]) { |array, element|
39
+ array += (struct[:values][element] || []).map { |v|
40
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
41
+ }.reject { |v| v.empty? }
42
+ }
43
+ }
44
+ end
55
45
 
56
- def check_number_of_arguments(expected, actual, blow = false, &block)
57
- return true if block ? block[actual] : expected == actual
46
+ def deferred?
47
+ true
48
+ end
58
49
 
59
- msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
50
+ private
60
51
 
61
- if blow
62
- raise FormatArgumentError, msg
63
- else
64
- warn msg
65
- return false
66
- end
67
- end
52
+ def check_number_of_arguments(expected, actual, blow = false, &block)
53
+ return true if block ? block[actual] : expected == actual
68
54
 
69
- def check_number_of_arguments!(expected, actual, &block)
70
- check_number_of_arguments(expected, actual, true, &block)
71
- end
55
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
72
56
 
57
+ if blow
58
+ raise FormatArgumentError, msg
59
+ else
60
+ warn msg
61
+ return false
73
62
  end
63
+ end
74
64
 
65
+ def check_number_of_arguments!(expected, actual, &block)
66
+ check_number_of_arguments(expected, actual, true, &block)
75
67
  end
76
68
 
77
69
  # "Nasenbär\n"
78
- class SingleWord < Athena::Formats::Lingo::Base
79
-
80
- register_formats :out, 'lingo/single_word'
70
+ register_format! :out, 'lingo/single_word' do
81
71
 
82
- def self.convert(record)
72
+ def convert(record)
83
73
  super.flatten
84
74
  end
85
75
 
86
76
  end
87
77
 
88
78
  # "John Vorhauer*Vorhauer, John\n"
89
- class KeyValue < Athena::Formats::Lingo::Base
79
+ register_format! :out, 'lingo/key_value' do
90
80
 
91
- register_formats :out, 'lingo/key_value'
92
-
93
- def self.convert(record)
81
+ def convert(record)
94
82
  super.map { |terms|
95
83
  next unless check_number_of_arguments(2, terms.size)
96
84
 
@@ -101,11 +89,9 @@ class Athena::Formats
101
89
  end
102
90
 
103
91
  # "Essen,essen #v Essen #s Esse #s\n"
104
- class WordClass < Athena::Formats::Lingo::Base
105
-
106
- register_formats :out, 'lingo/word_class'
92
+ register_format! :out, 'lingo/word_class' do
107
93
 
108
- def self.convert(record)
94
+ def convert(record)
109
95
  super.map { |terms|
110
96
  next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
111
97
  actual > 1 && actual % 2 == 1
@@ -120,11 +106,9 @@ class Athena::Formats
120
106
  end
121
107
 
122
108
  # "Fax;Faxkopie;Telefax\n"
123
- class MultiValue < Athena::Formats::Lingo::Base
124
-
125
- register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
109
+ register_format! :out, 'lingo/multi_value', 'lingo/multi_key' do
126
110
 
127
- def self.convert(record)
111
+ def convert(record)
128
112
  super.map { |terms|
129
113
  next unless check_number_of_arguments('> 1', terms.size) { |actual|
130
114
  actual > 1
@@ -26,31 +26,33 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- class Athena::Formats
29
+ module Athena::Formats
30
30
 
31
- class Sisis < Athena::Formats
31
+ class Sisis < Base
32
32
 
33
- register_format :in, 'sisis'
33
+ register_format :in do
34
34
 
35
- attr_reader :record_element, :config, :parser
35
+ attr_reader :record_element, :config, :parser
36
36
 
37
- def initialize(parser)
38
- config = parser.config.dup
37
+ def initialize(parser)
38
+ config = parser.config.dup
39
39
 
40
- case @record_element = config.delete(:__record_element)
41
- when String
42
- # fine!
43
- when nil
44
- raise NoRecordElementError, 'no record element specified'
45
- else
46
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
40
+ case @record_element = config.delete(:__record_element)
41
+ when String
42
+ # fine!
43
+ when nil
44
+ raise NoRecordElementError, 'no record element specified'
45
+ else
46
+ raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
47
+ end
48
+
49
+ @config = config
50
+ @parser = parser
47
51
  end
48
52
 
49
- @config = config
50
- @parser = parser
51
53
  end
52
54
 
53
- def parse(source)
55
+ def parse(source, &block)
54
56
  record, num = nil, 0
55
57
 
56
58
  source.each { |line|
@@ -59,7 +61,7 @@ class Athena::Formats
59
61
  case element
60
62
  when record_element
61
63
  record.close if record
62
- record = Athena::Record.new(parser.block, value)
64
+ record = Athena::Record.new(value, block)
63
65
  num += 1
64
66
  else
65
67
  record.update(element, value, config[element])
@@ -33,40 +33,47 @@ require 'rubygems'
33
33
  require 'xmlstreamin'
34
34
  require 'nuggets/hash/insert'
35
35
 
36
- class Athena::Formats
36
+ module Athena::Formats
37
37
 
38
- class XML < Athena::Formats
38
+ class XML < Base
39
39
 
40
40
  include Athena::Util
41
41
 
42
- register_format :in, 'xml'
42
+ register_format :in do
43
43
 
44
- attr_reader :spec, :listener
44
+ attr_reader :specs, :record_element
45
+
46
+ def initialize(parser)
47
+ @specs = setup_specs(parser.config.dup)
48
+ end
45
49
 
46
- def initialize(parser)
47
- @spec = build_spec(parser)
48
- @listener = XMLStreamin::XMLStreamListener.new(@spec)
49
50
  end
50
51
 
51
- def parse(source)
52
- REXML::Document.parse_stream(source, listener)
52
+ def parse(source, &block)
53
+ REXML::Document.parse_stream(source, listener(&block))
53
54
  end
54
55
 
55
- private
56
+ =begin
57
+ register_format :out
58
+
59
+ def convert(record)
60
+ # ...
61
+ end
62
+ =end
56
63
 
57
- def build_spec(parser)
58
- config = parser.config.dup
64
+ private
59
65
 
60
- case record_element = config.delete(:__record_element)
66
+ def setup_specs(config)
67
+ case @record_element = config.delete(:__record_element)
61
68
  when String
62
69
  # fine!
63
70
  when nil
64
71
  raise NoRecordElementError, 'no record element specified'
65
72
  else
66
- raise IllegalRecordElementError, "illegal record element #{record_element.inspect}"
73
+ raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
67
74
  end
68
75
 
69
- element_specs = config.inject({}) { |specs, (element, element_spec)|
76
+ config.inject({}) { |specs, (element, element_spec)|
70
77
  element_spec.each { |field, c|
71
78
  element.split('/').reverse.inject({}) { |hash, part|
72
79
  s = define_spec(element, field, c, hash.empty? ? :default : hash)
@@ -78,9 +85,11 @@ class Athena::Formats
78
85
 
79
86
  specs
80
87
  }
88
+ end
81
89
 
82
- record_spec = RecordSpec.new(parser)
83
- record_spec.specs!(element_specs)
90
+ def listener(&block)
91
+ record_spec = RecordSpec.new(&block)
92
+ record_spec.specs!(specs)
84
93
 
85
94
  root_spec = BaseSpec.new
86
95
  root_spec.specs!(record_element => record_spec)
@@ -92,7 +101,7 @@ class Athena::Formats
92
101
  spec.inspect_spec
93
102
  end
94
103
 
95
- spec
104
+ XMLStreamin::XMLStreamListener.new(spec)
96
105
  end
97
106
 
98
107
  def define_spec(element, field, config, arg)
@@ -197,19 +206,19 @@ class Athena::Formats
197
206
 
198
207
  class RecordSpec < BaseSpec
199
208
 
200
- attr_reader :parser
209
+ attr_reader :block
201
210
  attr_accessor :record
202
211
 
203
- def initialize(parser)
212
+ def initialize(&block)
204
213
  super()
205
214
 
206
- @parser = parser
215
+ @block = block
207
216
  end
208
217
 
209
218
  def start(context, name, attrs)
210
219
  super
211
220
 
212
- self.record = Athena::Record.new(parser.block, nil, true)
221
+ self.record = Athena::Record.new(nil, block, true)
213
222
  end
214
223
 
215
224
  def done(context, name)
@@ -33,8 +33,7 @@ class Athena::Parser
33
33
  DEFAULT_SEPARATOR = ', '
34
34
  DEFAULT_EMPTY = '<<EMPTY>>'
35
35
 
36
- attr_reader :config, :spec
37
- attr_accessor :block
36
+ attr_reader :config, :spec
38
37
 
39
38
  def initialize(config, spec)
40
39
  @config = build_config(config)
@@ -42,9 +41,7 @@ class Athena::Parser
42
41
  end
43
42
 
44
43
  def parse(source, &block)
45
- self.block = block
46
-
47
- res = spec.parse(source)
44
+ res = spec.parse(source, &block)
48
45
  res.is_a?(Numeric) ? res : Athena::Record.records
49
46
  end
50
47
 
@@ -50,10 +50,10 @@ class Athena::Record
50
50
 
51
51
  attr_reader :struct, :block, :id
52
52
 
53
- def initialize(block, id = nil, _add_record = !block)
54
- @struct = {}
55
- @block = block
53
+ def initialize(id = nil, block = nil, _add_record = !block)
56
54
  @id = id || object_id.abs
55
+ @block = block
56
+ @struct = {}
57
57
 
58
58
  add_record if _add_record
59
59
 
@@ -29,8 +29,8 @@
29
29
  module Athena::Version
30
30
 
31
31
  MAJOR = 0
32
- MINOR = 0
33
- TINY = 9
32
+ MINOR = 1
33
+ TINY = 0
34
34
 
35
35
  class << self
36
36
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: athena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-21 00:00:00 +02:00
12
+ date: 2009-08-24 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -68,15 +68,15 @@ licenses: []
68
68
 
69
69
  post_install_message:
70
70
  rdoc_options:
71
+ - --line-numbers
71
72
  - --main
72
73
  - README
73
- - --line-numbers
74
74
  - --inline-source
75
75
  - --title
76
76
  - athena Application documentation
77
- - --all
78
77
  - --charset
79
78
  - UTF-8
79
+ - --all
80
80
  require_paths:
81
81
  - lib
82
82
  required_ruby_version: !ruby/object:Gem::Requirement