athena 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to athena version 0.0.9
5
+ This documentation refers to athena version 0.1.0
6
6
 
7
7
 
8
8
  == DESCRIPTION
data/bin/athena CHANGED
@@ -77,12 +77,10 @@ OptionParser.new { |opts|
77
77
  }
78
78
 
79
79
  opts.on('-L', '--list-specs', "List available input formats (specs) and exit") {
80
- puts "Available input formats (specs):"
80
+ puts 'Available input formats (specs):'
81
81
 
82
- formats = Athena.input_formats
83
- max = formats.map { |a, _| a.length }.max
84
- formats.each { |f, k|
85
- puts " - %-#{max}s = %s" % [f, k]
82
+ Athena.input_formats.each { |f, k|
83
+ puts " - #{f}#{" (= #{k})" if f != k.to_s}"
86
84
  }
87
85
 
88
86
  exit 0
@@ -101,12 +99,10 @@ OptionParser.new { |opts|
101
99
  }
102
100
 
103
101
  opts.on('-l', '--list-formats', "List available output formats and exit") {
104
- puts "Available output formats:"
102
+ puts 'Available output formats:'
105
103
 
106
- formats = Athena.output_formats
107
- max = formats.map { |a, _| a.length }.max
108
- formats.each { |f, k|
109
- puts " - %-#{max}s = %s" % [f, k]
104
+ Athena.output_formats.each { |f, k|
105
+ puts " - #{f}#{" (= #{k})" if f != k.to_s}"
110
106
  }
111
107
 
112
108
  exit 0
@@ -173,8 +169,10 @@ if Athena.deferred_output?(format)
173
169
  options[:output].puts line
174
170
  }
175
171
  else
176
- res = parser.parse(options[:input]) { |record|
177
- options[:output].puts record.to(format)
172
+ res = Athena.with_format(format) { |_format|
173
+ parser.parse(options[:input]) { |record|
174
+ options[:output].puts record.to(_format)
175
+ }
178
176
  }
179
177
  end
180
178
 
@@ -30,9 +30,9 @@
30
30
  # output formats. It's accompanied by a corresponding script that gives access
31
31
  # to all its converting features.
32
32
  #
33
- # In order to support additional input and/or output formats, Athena::Formats
34
- # needs to be sub-classed and, respectively, an instance method _parse_ or a
35
- # class method _convert_ supplied. This way, a specific format can even function
33
+ # In order to support additional input and/or output formats, Athena::Formats::Base
34
+ # needs to be sub-classed and, respectively, an instance method _parse_ or an
35
+ # instance method _convert_ supplied. This way, a specific format can even function
36
36
  # as both input and output format.
37
37
 
38
38
  module Athena
@@ -53,23 +53,27 @@ module Athena
53
53
  end
54
54
 
55
55
  def input_formats
56
- Formats.formats[:in].sort
56
+ Formats::Base.formats[:in].sort
57
57
  end
58
58
 
59
59
  def valid_input_format?(format)
60
- Formats.valid_format?(:in, format)
60
+ Formats::Base.valid_format?(:in, format)
61
61
  end
62
62
 
63
63
  def output_formats
64
- Formats.formats[:out].sort
64
+ Formats::Base.formats[:out].sort
65
65
  end
66
66
 
67
67
  def valid_output_format?(format)
68
- Formats.valid_format?(:out, format)
68
+ Formats::Base.valid_format?(:out, format)
69
69
  end
70
70
 
71
71
  def deferred_output?(format)
72
72
  Formats[:out, format].deferred?
73
73
  end
74
74
 
75
+ def with_format(format, &block)
76
+ Formats[:out, format].wrap(&block)
77
+ end
78
+
75
79
  end
@@ -3,7 +3,7 @@
3
3
  # #
4
4
  # A component of athena, the database file converter. #
5
5
  # #
6
- # Copyright (C) 2007-2008 University of Cologne, #
6
+ # Copyright (C) 2007-2009 University of Cologne, #
7
7
  # Albertus-Magnus-Platz, #
8
8
  # 50932 Cologne, Germany #
9
9
  # #
@@ -26,56 +26,99 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- class Athena::Formats
29
+ module Athena::Formats
30
+
31
+ def self.[](direction, format)
32
+ if direction == :out
33
+ if format.class < Base
34
+ if format.class.direction != direction
35
+ raise DirectionMismatchError,
36
+ "expected #{direction}, got #{format.class.direction}"
37
+ else
38
+ format
39
+ end
40
+ else
41
+ Base.formats[direction][format].new
42
+ end
43
+ else
44
+ Base.formats[direction][format]
45
+ end
46
+ end
30
47
 
31
- @formats = { :in => {}, :out => {} }
48
+ class Base
32
49
 
33
- class << self
50
+ @formats = { :in => {}, :out => {} }
34
51
 
35
- def formats
36
- Athena::Formats.instance_variable_get(:@formats)
37
- end
52
+ class << self
38
53
 
39
- def [](direction, format)
40
- formats[direction][format]
41
- end
54
+ def formats
55
+ Base.instance_variable_get(:@formats)
56
+ end
42
57
 
43
- def valid_format?(direction, format)
44
- formats[direction].has_key?(format)
45
- end
58
+ def valid_format?(direction, format)
59
+ if format.class < Base
60
+ direction == format.class.direction
61
+ else
62
+ formats[direction].has_key?(format)
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def register_format(direction, *aliases, &block)
69
+ format = name.split('::').last.
70
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
71
+ gsub(/([a-z\d])([A-Z])/, '\1_\2').
72
+ downcase
73
+
74
+ register_format!(direction, format, *aliases, &block)
75
+ end
76
+
77
+ def register_format!(direction, format, *aliases, &block)
78
+ raise "must be a sub-class of #{Base}" unless self < Base
79
+
80
+ klass = Class.new(self, &block)
81
+
82
+ klass.instance_eval %Q{
83
+ def direction; #{direction.inspect}; end
84
+ def name; '#{format}::#{direction}'; end
85
+ def to_s; '#{format}'; end
86
+ }
87
+
88
+ [format, *aliases].each { |name|
89
+ if existing = formats[direction][name]
90
+ raise DuplicateFormatDefinitionError,
91
+ "format already defined (#{direction}): #{name}"
92
+ else
93
+ formats[direction][name] = klass
94
+ end
95
+ }
96
+ end
46
97
 
47
- def deferred?
48
- false
49
98
  end
50
99
 
51
- def convert(*args)
100
+ def parse(*args)
52
101
  raise NotImplementedError, 'must be defined by sub-class'
53
102
  end
54
103
 
55
- private
56
-
57
- def register_format(direction, format)
58
- if existing = formats[direction][format]
59
- raise DuplicateFormatDefinitionError,
60
- "format already defined (#{direction}): #{format} = #{existing}"
61
- end
104
+ def convert(record)
105
+ raise NotImplementedError, 'must be defined by sub-class'
106
+ end
62
107
 
63
- formats[direction][format] = self
108
+ def wrap
109
+ yield self
64
110
  end
65
111
 
66
- def register_formats(direction, *formats)
67
- formats.each { |format|
68
- register_format(direction, format)
69
- }
112
+ def deferred?
113
+ false
70
114
  end
71
115
 
72
116
  end
73
117
 
74
- def parse(*args)
75
- raise NotImplementedError, 'must be defined by sub-class'
118
+ class DuplicateFormatDefinitionError < StandardError
76
119
  end
77
120
 
78
- class DuplicateFormatDefinitionError < StandardError
121
+ class DirectionMismatchError < ArgumentError
79
122
  end
80
123
 
81
124
  class FormatArgumentError < ArgumentError
@@ -83,6 +126,4 @@ class Athena::Formats
83
126
 
84
127
  end
85
128
 
86
- Dir[__FILE__.sub(/\.rb$/, '/**/*.rb')].each { |rb|
87
- require rb
88
- }
129
+ Dir[__FILE__.sub(/\.rb\z/, '/**/*.rb')].sort.each { |rb| require rb }
@@ -28,11 +28,9 @@
28
28
 
29
29
  require 'iconv'
30
30
 
31
- class Athena::Formats
31
+ module Athena::Formats
32
32
 
33
- class DBM < Athena::Formats
34
-
35
- register_formats :out, 'dbm', 'midos'
33
+ class DBM < Base
36
34
 
37
35
  CRLF = "\015\012"
38
36
 
@@ -41,7 +39,9 @@ class Athena::Formats
41
39
  VALUE_SEPARATOR = '|'
42
40
  RECORD_SEPARATOR = '&&&'
43
41
 
44
- def self.convert(record)
42
+ register_format :out, 'midos'
43
+
44
+ def convert(record)
45
45
  dbm = ["ID:#{record.id}"]
46
46
 
47
47
  record.struct.each { |field, struct|
@@ -31,31 +31,33 @@ require 'rubygems'
31
31
  gem 'ferret', ENV['FERRET_VERSION'] if ENV['FERRET_VERSION']
32
32
  require 'ferret'
33
33
 
34
- class Athena::Formats
34
+ module Athena::Formats
35
35
 
36
- class Ferret < Athena::Formats
36
+ class Ferret < Base
37
37
 
38
- register_format :in, 'ferret'
38
+ register_format :in do
39
39
 
40
- attr_reader :record_element, :config, :parser, :match_all_query
40
+ attr_reader :record_element, :config, :parser, :match_all_query
41
41
 
42
- def initialize(parser)
43
- config = parser.config.dup
42
+ def initialize(parser)
43
+ config = parser.config.dup
44
44
 
45
- case @record_element = config.delete(:__record_element)
46
- when String
47
- # fine!
48
- when nil
49
- raise NoRecordElementError, 'no record element specified'
50
- else
51
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
45
+ case @record_element = config.delete(:__record_element)
46
+ when String
47
+ # fine!
48
+ when nil
49
+ raise NoRecordElementError, 'no record element specified'
50
+ else
51
+ raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
52
+ end
53
+
54
+ @config = config
55
+ @parser = parser
52
56
  end
53
57
 
54
- @config = config
55
- @parser = parser
56
58
  end
57
59
 
58
- def parse(source)
60
+ def parse(source, &block)
59
61
  path = source.path
60
62
 
61
63
  # make sure the index can be opened
@@ -82,7 +84,7 @@ class Athena::Formats
82
84
  unless index.deleted?(i)
83
85
  doc = index[i]
84
86
 
85
- Athena::Record.new(parser.block, doc[record_element]) { |record|
87
+ Athena::Record.new(doc[record_element], block) { |record|
86
88
  config.each { |element, field_config|
87
89
  record.update(element, doc[element], field_config)
88
90
  }
@@ -29,68 +29,56 @@
29
29
  require 'iconv'
30
30
  require 'enumerator'
31
31
 
32
- class Athena::Formats
33
-
34
- module Lingo
35
-
36
- class Base < Athena::Formats
37
-
38
- class << self
39
-
40
- def convert(record)
41
- record.struct.inject([]) { |terms, (field, struct)|
42
- terms << struct[:elements].inject([]) { |array, element|
43
- array += (struct[:values][element] || []).map { |v|
44
- (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
45
- }.reject { |v| v.empty? }
46
- }
47
- }
48
- end
49
-
50
- def deferred?
51
- true
52
- end
53
-
54
- private
32
+ module Athena::Formats
33
+
34
+ class Lingo < Base
35
+
36
+ def convert(record)
37
+ record.struct.inject([]) { |terms, (field, struct)|
38
+ terms << struct[:elements].inject([]) { |array, element|
39
+ array += (struct[:values][element] || []).map { |v|
40
+ (v || '').strip.gsub(/(?:\r?\n)+/, ' ')
41
+ }.reject { |v| v.empty? }
42
+ }
43
+ }
44
+ end
55
45
 
56
- def check_number_of_arguments(expected, actual, blow = false, &block)
57
- return true if block ? block[actual] : expected == actual
46
+ def deferred?
47
+ true
48
+ end
58
49
 
59
- msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
50
+ private
60
51
 
61
- if blow
62
- raise FormatArgumentError, msg
63
- else
64
- warn msg
65
- return false
66
- end
67
- end
52
+ def check_number_of_arguments(expected, actual, blow = false, &block)
53
+ return true if block ? block[actual] : expected == actual
68
54
 
69
- def check_number_of_arguments!(expected, actual, &block)
70
- check_number_of_arguments(expected, actual, true, &block)
71
- end
55
+ msg = "wrong number of arguments for #{self} (#{actual} for #{expected})"
72
56
 
57
+ if blow
58
+ raise FormatArgumentError, msg
59
+ else
60
+ warn msg
61
+ return false
73
62
  end
63
+ end
74
64
 
65
+ def check_number_of_arguments!(expected, actual, &block)
66
+ check_number_of_arguments(expected, actual, true, &block)
75
67
  end
76
68
 
77
69
  # "Nasenbär\n"
78
- class SingleWord < Athena::Formats::Lingo::Base
79
-
80
- register_formats :out, 'lingo/single_word'
70
+ register_format! :out, 'lingo/single_word' do
81
71
 
82
- def self.convert(record)
72
+ def convert(record)
83
73
  super.flatten
84
74
  end
85
75
 
86
76
  end
87
77
 
88
78
  # "John Vorhauer*Vorhauer, John\n"
89
- class KeyValue < Athena::Formats::Lingo::Base
79
+ register_format! :out, 'lingo/key_value' do
90
80
 
91
- register_formats :out, 'lingo/key_value'
92
-
93
- def self.convert(record)
81
+ def convert(record)
94
82
  super.map { |terms|
95
83
  next unless check_number_of_arguments(2, terms.size)
96
84
 
@@ -101,11 +89,9 @@ class Athena::Formats
101
89
  end
102
90
 
103
91
  # "Essen,essen #v Essen #s Esse #s\n"
104
- class WordClass < Athena::Formats::Lingo::Base
105
-
106
- register_formats :out, 'lingo/word_class'
92
+ register_format! :out, 'lingo/word_class' do
107
93
 
108
- def self.convert(record)
94
+ def convert(record)
109
95
  super.map { |terms|
110
96
  next unless check_number_of_arguments('odd, > 1', terms.size) { |actual|
111
97
  actual > 1 && actual % 2 == 1
@@ -120,11 +106,9 @@ class Athena::Formats
120
106
  end
121
107
 
122
108
  # "Fax;Faxkopie;Telefax\n"
123
- class MultiValue < Athena::Formats::Lingo::Base
124
-
125
- register_formats :out, 'lingo/multi_value', 'lingo/multi_key'
109
+ register_format! :out, 'lingo/multi_value', 'lingo/multi_key' do
126
110
 
127
- def self.convert(record)
111
+ def convert(record)
128
112
  super.map { |terms|
129
113
  next unless check_number_of_arguments('> 1', terms.size) { |actual|
130
114
  actual > 1
@@ -26,31 +26,33 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- class Athena::Formats
29
+ module Athena::Formats
30
30
 
31
- class Sisis < Athena::Formats
31
+ class Sisis < Base
32
32
 
33
- register_format :in, 'sisis'
33
+ register_format :in do
34
34
 
35
- attr_reader :record_element, :config, :parser
35
+ attr_reader :record_element, :config, :parser
36
36
 
37
- def initialize(parser)
38
- config = parser.config.dup
37
+ def initialize(parser)
38
+ config = parser.config.dup
39
39
 
40
- case @record_element = config.delete(:__record_element)
41
- when String
42
- # fine!
43
- when nil
44
- raise NoRecordElementError, 'no record element specified'
45
- else
46
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
40
+ case @record_element = config.delete(:__record_element)
41
+ when String
42
+ # fine!
43
+ when nil
44
+ raise NoRecordElementError, 'no record element specified'
45
+ else
46
+ raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
47
+ end
48
+
49
+ @config = config
50
+ @parser = parser
47
51
  end
48
52
 
49
- @config = config
50
- @parser = parser
51
53
  end
52
54
 
53
- def parse(source)
55
+ def parse(source, &block)
54
56
  record, num = nil, 0
55
57
 
56
58
  source.each { |line|
@@ -59,7 +61,7 @@ class Athena::Formats
59
61
  case element
60
62
  when record_element
61
63
  record.close if record
62
- record = Athena::Record.new(parser.block, value)
64
+ record = Athena::Record.new(value, block)
63
65
  num += 1
64
66
  else
65
67
  record.update(element, value, config[element])
@@ -33,40 +33,47 @@ require 'rubygems'
33
33
  require 'xmlstreamin'
34
34
  require 'nuggets/hash/insert'
35
35
 
36
- class Athena::Formats
36
+ module Athena::Formats
37
37
 
38
- class XML < Athena::Formats
38
+ class XML < Base
39
39
 
40
40
  include Athena::Util
41
41
 
42
- register_format :in, 'xml'
42
+ register_format :in do
43
43
 
44
- attr_reader :spec, :listener
44
+ attr_reader :specs, :record_element
45
+
46
+ def initialize(parser)
47
+ @specs = setup_specs(parser.config.dup)
48
+ end
45
49
 
46
- def initialize(parser)
47
- @spec = build_spec(parser)
48
- @listener = XMLStreamin::XMLStreamListener.new(@spec)
49
50
  end
50
51
 
51
- def parse(source)
52
- REXML::Document.parse_stream(source, listener)
52
+ def parse(source, &block)
53
+ REXML::Document.parse_stream(source, listener(&block))
53
54
  end
54
55
 
55
- private
56
+ =begin
57
+ register_format :out
58
+
59
+ def convert(record)
60
+ # ...
61
+ end
62
+ =end
56
63
 
57
- def build_spec(parser)
58
- config = parser.config.dup
64
+ private
59
65
 
60
- case record_element = config.delete(:__record_element)
66
+ def setup_specs(config)
67
+ case @record_element = config.delete(:__record_element)
61
68
  when String
62
69
  # fine!
63
70
  when nil
64
71
  raise NoRecordElementError, 'no record element specified'
65
72
  else
66
- raise IllegalRecordElementError, "illegal record element #{record_element.inspect}"
73
+ raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
67
74
  end
68
75
 
69
- element_specs = config.inject({}) { |specs, (element, element_spec)|
76
+ config.inject({}) { |specs, (element, element_spec)|
70
77
  element_spec.each { |field, c|
71
78
  element.split('/').reverse.inject({}) { |hash, part|
72
79
  s = define_spec(element, field, c, hash.empty? ? :default : hash)
@@ -78,9 +85,11 @@ class Athena::Formats
78
85
 
79
86
  specs
80
87
  }
88
+ end
81
89
 
82
- record_spec = RecordSpec.new(parser)
83
- record_spec.specs!(element_specs)
90
+ def listener(&block)
91
+ record_spec = RecordSpec.new(&block)
92
+ record_spec.specs!(specs)
84
93
 
85
94
  root_spec = BaseSpec.new
86
95
  root_spec.specs!(record_element => record_spec)
@@ -92,7 +101,7 @@ class Athena::Formats
92
101
  spec.inspect_spec
93
102
  end
94
103
 
95
- spec
104
+ XMLStreamin::XMLStreamListener.new(spec)
96
105
  end
97
106
 
98
107
  def define_spec(element, field, config, arg)
@@ -197,19 +206,19 @@ class Athena::Formats
197
206
 
198
207
  class RecordSpec < BaseSpec
199
208
 
200
- attr_reader :parser
209
+ attr_reader :block
201
210
  attr_accessor :record
202
211
 
203
- def initialize(parser)
212
+ def initialize(&block)
204
213
  super()
205
214
 
206
- @parser = parser
215
+ @block = block
207
216
  end
208
217
 
209
218
  def start(context, name, attrs)
210
219
  super
211
220
 
212
- self.record = Athena::Record.new(parser.block, nil, true)
221
+ self.record = Athena::Record.new(nil, block, true)
213
222
  end
214
223
 
215
224
  def done(context, name)
@@ -33,8 +33,7 @@ class Athena::Parser
33
33
  DEFAULT_SEPARATOR = ', '
34
34
  DEFAULT_EMPTY = '<<EMPTY>>'
35
35
 
36
- attr_reader :config, :spec
37
- attr_accessor :block
36
+ attr_reader :config, :spec
38
37
 
39
38
  def initialize(config, spec)
40
39
  @config = build_config(config)
@@ -42,9 +41,7 @@ class Athena::Parser
42
41
  end
43
42
 
44
43
  def parse(source, &block)
45
- self.block = block
46
-
47
- res = spec.parse(source)
44
+ res = spec.parse(source, &block)
48
45
  res.is_a?(Numeric) ? res : Athena::Record.records
49
46
  end
50
47
 
@@ -50,10 +50,10 @@ class Athena::Record
50
50
 
51
51
  attr_reader :struct, :block, :id
52
52
 
53
- def initialize(block, id = nil, _add_record = !block)
54
- @struct = {}
55
- @block = block
53
+ def initialize(id = nil, block = nil, _add_record = !block)
56
54
  @id = id || object_id.abs
55
+ @block = block
56
+ @struct = {}
57
57
 
58
58
  add_record if _add_record
59
59
 
@@ -29,8 +29,8 @@
29
29
  module Athena::Version
30
30
 
31
31
  MAJOR = 0
32
- MINOR = 0
33
- TINY = 9
32
+ MINOR = 1
33
+ TINY = 0
34
34
 
35
35
  class << self
36
36
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: athena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-21 00:00:00 +02:00
12
+ date: 2009-08-24 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -68,15 +68,15 @@ licenses: []
68
68
 
69
69
  post_install_message:
70
70
  rdoc_options:
71
+ - --line-numbers
71
72
  - --main
72
73
  - README
73
- - --line-numbers
74
74
  - --inline-source
75
75
  - --title
76
76
  - athena Application documentation
77
- - --all
78
77
  - --charset
79
78
  - UTF-8
79
+ - --all
80
80
  require_paths:
81
81
  - lib
82
82
  required_ruby_version: !ruby/object:Gem::Requirement