athena 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,8 +38,6 @@ module Athena::Formats
38
38
 
39
39
  ICONV_TO_LATIN1 = Iconv.new('latin1//TRANSLIT//IGNORE', 'utf-8')
40
40
 
41
- register_format :out, 'midos'
42
-
43
41
  def convert(record)
44
42
  dbm = ["ID:#{record.id}"]
45
43
 
@@ -70,4 +68,6 @@ module Athena::Formats
70
68
 
71
69
  end
72
70
 
71
+ Midos = DBM
72
+
73
73
  end
@@ -43,27 +43,8 @@ module Athena::Formats
43
43
 
44
44
  class Ferret < Base
45
45
 
46
- register_format :in do
47
-
48
- attr_reader :record_element, :config, :match_all_query
49
-
50
- def initialize(parser)
51
- @config = parser.config.dup
52
-
53
- case @record_element = @config.delete(:__record_element)
54
- when String
55
- # fine!
56
- when nil
57
- raise NoRecordElementError, 'no record element specified'
58
- else
59
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
60
- end
61
- end
62
-
63
- end if Object.const_defined?(:Ferret)
64
-
65
- def parse(source, &block)
66
- path = source.path
46
+ def parse(input, &block)
47
+ path = input.path
67
48
 
68
49
  # make sure the index can be opened
69
50
  begin
@@ -100,6 +81,8 @@ module Athena::Formats
100
81
  index.num_docs
101
82
  end
102
83
 
84
+ private :parse unless Object.const_defined?(:Ferret)
85
+
103
86
  end
104
87
 
105
88
  end
@@ -34,51 +34,8 @@ module Athena::Formats
34
34
 
35
35
  class Lingo < Base
36
36
 
37
- KV_SEPARATOR = '*'
38
- WC_SEPARATOR = ','
39
- MV_SEPARATOR = ';'
40
-
41
- def convert(record)
42
- terms = []
43
-
44
- record.struct.each { |field, struct|
45
- struct_values = struct[:values]
46
- struct_values.default = []
47
-
48
- values = []
49
-
50
- struct[:elements].each { |element|
51
- struct_values[element].each { |value|
52
- if value
53
- value = value.strip.gsub(CRLF_RE, ' ')
54
- values << value unless value.empty?
55
- end
56
- }
57
- }
58
-
59
- terms << values
60
- }
61
-
62
- terms
63
- end
64
-
65
- def deferred?
66
- true
67
- end
68
-
69
- private
70
-
71
- def check_args(expected, actual, &block)
72
- if block ? block[actual] : expected == actual
73
- true
74
- else
75
- warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
76
- false
77
- end
78
- end
79
-
80
- # "Nasenbär\n"
81
- register_format! :out, 'lingo/single_word' do
37
+ # "Nasenbär"
38
+ class SingleWord < Lingo
82
39
 
83
40
  def convert(record)
84
41
  super.flatten
@@ -86,25 +43,29 @@ module Athena::Formats
86
43
 
87
44
  end
88
45
 
89
- # "John Vorhauer*Vorhauer, John\n"
90
- register_format! :out, 'lingo/key_value' do
46
+ # "John Vorhauer*Vorhauer, John"
47
+ class KeyValue < Lingo
48
+
49
+ SEPARATOR = '*'
91
50
 
92
51
  def convert(record)
93
52
  super.map { |terms|
94
- terms.join(KV_SEPARATOR) if check_args(2, terms.size)
53
+ terms.join(SEPARATOR) if check_args(2, terms.size)
95
54
  }.compact
96
55
  end
97
56
 
98
57
  end
99
58
 
100
- # "Essen,essen #v Essen #s Esse #s\n"
101
- register_format! :out, 'lingo/word_class' do
59
+ # "Essen,essen #v Essen #s Esse #s"
60
+ class WordClass < Lingo
61
+
62
+ SEPARATOR = ','
102
63
 
103
64
  def convert(record)
104
65
  super.map { |terms|
105
66
  [ terms.shift,
106
67
  terms.to_enum(:each_slice, 2).map { |w, c| "#{w} ##{c}" }.join(' ')
107
- ].join(WC_SEPARATOR) if check_args('odd, > 1', terms.size) { |actual|
68
+ ].join(SEPARATOR) if check_args('odd, > 1', terms.size) { |actual|
108
69
  actual > 1 && actual % 2 == 1
109
70
  }
110
71
  }.compact
@@ -112,12 +73,14 @@ module Athena::Formats
112
73
 
113
74
  end
114
75
 
115
- # "Fax;Faxkopie;Telefax\n"
116
- register_format! :out, 'lingo/multi_value', 'lingo/multi_key' do
76
+ # "Fax;Faxkopie;Telefax"
77
+ class MultiValue < Lingo
78
+
79
+ SEPARATOR = ';'
117
80
 
118
81
  def convert(record)
119
82
  super.map { |terms|
120
- terms.join(MV_SEPARATOR) if check_args('> 1', terms.size) { |actual|
83
+ terms.join(SEPARATOR) if check_args('> 1', terms.size) { |actual|
121
84
  actual > 1
122
85
  }
123
86
  }.compact
@@ -125,6 +88,47 @@ module Athena::Formats
125
88
 
126
89
  end
127
90
 
91
+ MultiKey = MultiValue
92
+
93
+ def deferred?
94
+ true
95
+ end
96
+
97
+ private
98
+
99
+ def convert(record)
100
+ terms = []
101
+
102
+ record.struct.each { |field, struct|
103
+ struct_values = struct[:values]
104
+ struct_values.default = []
105
+
106
+ values = []
107
+
108
+ struct[:elements].each { |element|
109
+ struct_values[element].each { |value|
110
+ if value
111
+ value = value.strip.gsub(CRLF_RE, ' ')
112
+ values << value unless value.empty?
113
+ end
114
+ }
115
+ }
116
+
117
+ terms << values
118
+ }
119
+
120
+ terms
121
+ end
122
+
123
+ def check_args(expected, actual, &block)
124
+ if block ? block[actual] : expected == actual
125
+ true
126
+ else
127
+ warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
128
+ false
129
+ end
130
+ end
131
+
128
132
  end
129
133
 
130
134
  end
@@ -34,29 +34,10 @@ module Athena::Formats
34
34
 
35
35
  RECORD_RE = %r{(\d+).*?:\s*(.*)}
36
36
 
37
- register_format :in do
38
-
39
- attr_reader :record_element, :config
40
-
41
- def initialize(parser)
42
- @config = parser.config.dup
43
-
44
- case @record_element = @config.delete(:__record_element)
45
- when String
46
- # fine!
47
- when nil
48
- raise NoRecordElementError, 'no record element specified'
49
- else
50
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
51
- end
52
- end
53
-
54
- end
55
-
56
- def parse(source, &block)
37
+ def parse(input, &block)
57
38
  record, num = nil, 0
58
39
 
59
- source.each { |line|
40
+ input.each { |line|
60
41
  element, value = line.match(RECORD_RE)[1, 2]
61
42
 
62
43
  if element == record_element
@@ -33,29 +33,12 @@ module Athena::Formats
33
33
 
34
34
  class MYSQL < Base
35
35
 
36
- register_format :in do
36
+ attr_reader :sql_parser
37
37
 
38
- attr_reader :record_element, :config, :sql_parser
39
-
40
- def initialize(parser)
41
- @config = parser.config.dup
42
-
43
- case @record_element = @config.delete(:__record_element)
44
- when String, nil
45
- # fine!
46
- else
47
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
48
- end
49
-
50
- @sql_parser = SQLParser.new
51
- end
52
-
53
- end
54
-
55
- def parse(source, &block)
38
+ def parse(input, &block)
56
39
  columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
57
40
 
58
- source.each { |line|
41
+ input.each { |line|
59
42
  case line = line.chomp
60
43
  when /\ACREATE\s+TABLE\s+`(.+?)`/i
61
44
  table = $1
@@ -88,6 +71,14 @@ module Athena::Formats
88
71
  num
89
72
  end
90
73
 
74
+ private
75
+
76
+ def init_in(*)
77
+ @__record_element_ok__ = [String, nil]
78
+ super
79
+ @sql_parser = SQLParser.new
80
+ end
81
+
91
82
  class SQLParser
92
83
 
93
84
  AST = Struct.new(:value)
@@ -104,7 +95,7 @@ module Athena::Formats
104
95
  while result = parse_row
105
96
  row = result.value
106
97
  block_given ? yield(row) : rows << row
107
- break unless @input.scan(/,/)
98
+ break unless @input.scan(/,\s*/)
108
99
  end
109
100
 
110
101
  @input.scan(/;/) # optional
@@ -114,8 +105,6 @@ module Athena::Formats
114
105
  rows unless block_given
115
106
  end
116
107
 
117
- private
118
-
119
108
  def parse_row
120
109
  return unless @input.scan(/\(/)
121
110
 
@@ -123,7 +112,7 @@ module Athena::Formats
123
112
 
124
113
  while result = parse_value
125
114
  row << result.value
126
- break unless @input.scan(/,/)
115
+ break unless @input.scan(/,\s*/)
127
116
  end
128
117
 
129
118
  error('Unclosed row') unless @input.scan(/\)/)
@@ -191,27 +180,10 @@ module Athena::Formats
191
180
 
192
181
  class PGSQL < Base
193
182
 
194
- register_format :in do
195
-
196
- attr_reader :record_element, :config
197
-
198
- def initialize(parser)
199
- @config = parser.config.dup
200
-
201
- case @record_element = @config.delete(:__record_element)
202
- when String, nil
203
- # fine!
204
- else
205
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
206
- end
207
- end
208
-
209
- end
210
-
211
- def parse(source, &block)
183
+ def parse(input, &block)
212
184
  columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
213
185
 
214
- source.each { |line|
186
+ input.each { |line|
215
187
  case line = line.chomp
216
188
  when /\ACOPY\s+(\S+)\s+\((.+?)\)\s+FROM\s+stdin;\z/i
217
189
  columns[table = $1] = $2.split(/\s*,\s*/)
@@ -242,9 +214,16 @@ module Athena::Formats
242
214
  num
243
215
  end
244
216
 
217
+ private
218
+
219
+ def init_in(*)
220
+ @__record_element_ok__ = [String, nil]
221
+ super
222
+ end
223
+
245
224
  end
246
225
 
247
- MySQL = MYSQL
248
- PgSQL = PGSQL
226
+ class MySQL < MYSQL; private :parse; end
227
+ class PgSQL < PGSQL; private :parse; end
249
228
 
250
229
  end
@@ -42,22 +42,13 @@ module Athena::Formats
42
42
 
43
43
  VALUE_SEPARATOR = '|'
44
44
 
45
- register_format :in do
46
-
47
- attr_reader :specs, :record_element
48
-
49
- def initialize(parser)
50
- @specs = setup_specs(parser.config.dup)
51
- end
45
+ attr_reader :specs
52
46
 
47
+ def parse(input, &block)
48
+ REXML::Document.parse_stream(input, listener(&block))
49
+ Athena::Record.records.size
53
50
  end
54
51
 
55
- def parse(source, &block)
56
- REXML::Document.parse_stream(source, listener(&block))
57
- end
58
-
59
- register_format :out
60
-
61
52
  def convert(record)
62
53
  builder.row {
63
54
  builder.id record.id
@@ -79,7 +70,7 @@ module Athena::Formats
79
70
  }
80
71
  end
81
72
 
82
- register_format! :out, 'xml/flat' do
73
+ class Flat < XML
83
74
 
84
75
  def convert(record)
85
76
  super { |field, struct|
@@ -99,18 +90,43 @@ module Athena::Formats
99
90
 
100
91
  end
101
92
 
102
- def wrap(out = nil)
103
- res = nil
104
- builder(:target => out).database { res = super() }
105
- res
106
- end
107
-
108
93
  def raw?
109
94
  true
110
95
  end
111
96
 
112
97
  private
113
98
 
99
+ def init_in(*)
100
+ @__record_element_ok__ = [String, Array]
101
+ super
102
+
103
+ case @skip_hierarchy = @config.delete(:__skip_hierarchy)
104
+ when Integer
105
+ # fine!
106
+ when nil
107
+ @skip_hierarchy = 0
108
+ else
109
+ raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
110
+ end
111
+
112
+ @specs = {}
113
+
114
+ @config.each { |element, element_spec| element_spec.each { |field, c|
115
+ element.split('/').reverse.inject({}) { |hash, part|
116
+ s = define_spec(element, field, c, hash.empty? ? :default : hash)
117
+ merge_specs(hash, part, s)
118
+ }.each { |key, s|
119
+ merge_specs(@specs, key, s)
120
+ }
121
+ } }
122
+ end
123
+
124
+ def wrap
125
+ res = nil
126
+ builder(:target => output).database { res = super() }
127
+ res
128
+ end
129
+
114
130
  def builder(options = {})
115
131
  @builder ||= begin
116
132
  builder = Builder::XmlMarkup.new({ :indent => 2 }.merge(options))
@@ -129,39 +145,6 @@ module Athena::Formats
129
145
  end
130
146
  end
131
147
 
132
- def setup_specs(config)
133
- case @record_element = config.delete(:__record_element)
134
- when String, Array
135
- # fine!
136
- when nil
137
- raise NoRecordElementError, 'no record element specified'
138
- else
139
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
140
- end
141
-
142
- case @skip_hierarchy = config.delete(:__skip_hierarchy)
143
- when Integer
144
- # fine!
145
- when nil
146
- @skip_hierarchy = 0
147
- else
148
- raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
149
- end
150
-
151
- config.inject({}) { |specs, (element, element_spec)|
152
- element_spec.each { |field, c|
153
- element.split('/').reverse.inject({}) { |hash, part|
154
- s = define_spec(element, field, c, hash.empty? ? :default : hash)
155
- merge_specs(hash, part, s)
156
- }.each { |key, s|
157
- merge_specs(specs, key, s)
158
- }
159
- }
160
-
161
- specs
162
- }
163
- end
164
-
165
148
  def listener(&block)
166
149
  record_spec = RecordSpec.new(&block)
167
150
  record_spec.specs!(specs)