athena 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -38,8 +38,6 @@ module Athena::Formats
38
38
 
39
39
  ICONV_TO_LATIN1 = Iconv.new('latin1//TRANSLIT//IGNORE', 'utf-8')
40
40
 
41
- register_format :out, 'midos'
42
-
43
41
  def convert(record)
44
42
  dbm = ["ID:#{record.id}"]
45
43
 
@@ -70,4 +68,6 @@ module Athena::Formats
70
68
 
71
69
  end
72
70
 
71
+ Midos = DBM
72
+
73
73
  end
@@ -43,27 +43,8 @@ module Athena::Formats
43
43
 
44
44
  class Ferret < Base
45
45
 
46
- register_format :in do
47
-
48
- attr_reader :record_element, :config, :match_all_query
49
-
50
- def initialize(parser)
51
- @config = parser.config.dup
52
-
53
- case @record_element = @config.delete(:__record_element)
54
- when String
55
- # fine!
56
- when nil
57
- raise NoRecordElementError, 'no record element specified'
58
- else
59
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
60
- end
61
- end
62
-
63
- end if Object.const_defined?(:Ferret)
64
-
65
- def parse(source, &block)
66
- path = source.path
46
+ def parse(input, &block)
47
+ path = input.path
67
48
 
68
49
  # make sure the index can be opened
69
50
  begin
@@ -100,6 +81,8 @@ module Athena::Formats
100
81
  index.num_docs
101
82
  end
102
83
 
84
+ private :parse unless Object.const_defined?(:Ferret)
85
+
103
86
  end
104
87
 
105
88
  end
@@ -34,51 +34,8 @@ module Athena::Formats
34
34
 
35
35
  class Lingo < Base
36
36
 
37
- KV_SEPARATOR = '*'
38
- WC_SEPARATOR = ','
39
- MV_SEPARATOR = ';'
40
-
41
- def convert(record)
42
- terms = []
43
-
44
- record.struct.each { |field, struct|
45
- struct_values = struct[:values]
46
- struct_values.default = []
47
-
48
- values = []
49
-
50
- struct[:elements].each { |element|
51
- struct_values[element].each { |value|
52
- if value
53
- value = value.strip.gsub(CRLF_RE, ' ')
54
- values << value unless value.empty?
55
- end
56
- }
57
- }
58
-
59
- terms << values
60
- }
61
-
62
- terms
63
- end
64
-
65
- def deferred?
66
- true
67
- end
68
-
69
- private
70
-
71
- def check_args(expected, actual, &block)
72
- if block ? block[actual] : expected == actual
73
- true
74
- else
75
- warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
76
- false
77
- end
78
- end
79
-
80
- # "Nasenbär\n"
81
- register_format! :out, 'lingo/single_word' do
37
+ # "Nasenbär"
38
+ class SingleWord < Lingo
82
39
 
83
40
  def convert(record)
84
41
  super.flatten
@@ -86,25 +43,29 @@ module Athena::Formats
86
43
 
87
44
  end
88
45
 
89
- # "John Vorhauer*Vorhauer, John\n"
90
- register_format! :out, 'lingo/key_value' do
46
+ # "John Vorhauer*Vorhauer, John"
47
+ class KeyValue < Lingo
48
+
49
+ SEPARATOR = '*'
91
50
 
92
51
  def convert(record)
93
52
  super.map { |terms|
94
- terms.join(KV_SEPARATOR) if check_args(2, terms.size)
53
+ terms.join(SEPARATOR) if check_args(2, terms.size)
95
54
  }.compact
96
55
  end
97
56
 
98
57
  end
99
58
 
100
- # "Essen,essen #v Essen #s Esse #s\n"
101
- register_format! :out, 'lingo/word_class' do
59
+ # "Essen,essen #v Essen #s Esse #s"
60
+ class WordClass < Lingo
61
+
62
+ SEPARATOR = ','
102
63
 
103
64
  def convert(record)
104
65
  super.map { |terms|
105
66
  [ terms.shift,
106
67
  terms.to_enum(:each_slice, 2).map { |w, c| "#{w} ##{c}" }.join(' ')
107
- ].join(WC_SEPARATOR) if check_args('odd, > 1', terms.size) { |actual|
68
+ ].join(SEPARATOR) if check_args('odd, > 1', terms.size) { |actual|
108
69
  actual > 1 && actual % 2 == 1
109
70
  }
110
71
  }.compact
@@ -112,12 +73,14 @@ module Athena::Formats
112
73
 
113
74
  end
114
75
 
115
- # "Fax;Faxkopie;Telefax\n"
116
- register_format! :out, 'lingo/multi_value', 'lingo/multi_key' do
76
+ # "Fax;Faxkopie;Telefax"
77
+ class MultiValue < Lingo
78
+
79
+ SEPARATOR = ';'
117
80
 
118
81
  def convert(record)
119
82
  super.map { |terms|
120
- terms.join(MV_SEPARATOR) if check_args('> 1', terms.size) { |actual|
83
+ terms.join(SEPARATOR) if check_args('> 1', terms.size) { |actual|
121
84
  actual > 1
122
85
  }
123
86
  }.compact
@@ -125,6 +88,47 @@ module Athena::Formats
125
88
 
126
89
  end
127
90
 
91
+ MultiKey = MultiValue
92
+
93
+ def deferred?
94
+ true
95
+ end
96
+
97
+ private
98
+
99
+ def convert(record)
100
+ terms = []
101
+
102
+ record.struct.each { |field, struct|
103
+ struct_values = struct[:values]
104
+ struct_values.default = []
105
+
106
+ values = []
107
+
108
+ struct[:elements].each { |element|
109
+ struct_values[element].each { |value|
110
+ if value
111
+ value = value.strip.gsub(CRLF_RE, ' ')
112
+ values << value unless value.empty?
113
+ end
114
+ }
115
+ }
116
+
117
+ terms << values
118
+ }
119
+
120
+ terms
121
+ end
122
+
123
+ def check_args(expected, actual, &block)
124
+ if block ? block[actual] : expected == actual
125
+ true
126
+ else
127
+ warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
128
+ false
129
+ end
130
+ end
131
+
128
132
  end
129
133
 
130
134
  end
@@ -34,29 +34,10 @@ module Athena::Formats
34
34
 
35
35
  RECORD_RE = %r{(\d+).*?:\s*(.*)}
36
36
 
37
- register_format :in do
38
-
39
- attr_reader :record_element, :config
40
-
41
- def initialize(parser)
42
- @config = parser.config.dup
43
-
44
- case @record_element = @config.delete(:__record_element)
45
- when String
46
- # fine!
47
- when nil
48
- raise NoRecordElementError, 'no record element specified'
49
- else
50
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
51
- end
52
- end
53
-
54
- end
55
-
56
- def parse(source, &block)
37
+ def parse(input, &block)
57
38
  record, num = nil, 0
58
39
 
59
- source.each { |line|
40
+ input.each { |line|
60
41
  element, value = line.match(RECORD_RE)[1, 2]
61
42
 
62
43
  if element == record_element
@@ -33,29 +33,12 @@ module Athena::Formats
33
33
 
34
34
  class MYSQL < Base
35
35
 
36
- register_format :in do
36
+ attr_reader :sql_parser
37
37
 
38
- attr_reader :record_element, :config, :sql_parser
39
-
40
- def initialize(parser)
41
- @config = parser.config.dup
42
-
43
- case @record_element = @config.delete(:__record_element)
44
- when String, nil
45
- # fine!
46
- else
47
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
48
- end
49
-
50
- @sql_parser = SQLParser.new
51
- end
52
-
53
- end
54
-
55
- def parse(source, &block)
38
+ def parse(input, &block)
56
39
  columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
57
40
 
58
- source.each { |line|
41
+ input.each { |line|
59
42
  case line = line.chomp
60
43
  when /\ACREATE\s+TABLE\s+`(.+?)`/i
61
44
  table = $1
@@ -88,6 +71,14 @@ module Athena::Formats
88
71
  num
89
72
  end
90
73
 
74
+ private
75
+
76
+ def init_in(*)
77
+ @__record_element_ok__ = [String, nil]
78
+ super
79
+ @sql_parser = SQLParser.new
80
+ end
81
+
91
82
  class SQLParser
92
83
 
93
84
  AST = Struct.new(:value)
@@ -104,7 +95,7 @@ module Athena::Formats
104
95
  while result = parse_row
105
96
  row = result.value
106
97
  block_given ? yield(row) : rows << row
107
- break unless @input.scan(/,/)
98
+ break unless @input.scan(/,\s*/)
108
99
  end
109
100
 
110
101
  @input.scan(/;/) # optional
@@ -114,8 +105,6 @@ module Athena::Formats
114
105
  rows unless block_given
115
106
  end
116
107
 
117
- private
118
-
119
108
  def parse_row
120
109
  return unless @input.scan(/\(/)
121
110
 
@@ -123,7 +112,7 @@ module Athena::Formats
123
112
 
124
113
  while result = parse_value
125
114
  row << result.value
126
- break unless @input.scan(/,/)
115
+ break unless @input.scan(/,\s*/)
127
116
  end
128
117
 
129
118
  error('Unclosed row') unless @input.scan(/\)/)
@@ -191,27 +180,10 @@ module Athena::Formats
191
180
 
192
181
  class PGSQL < Base
193
182
 
194
- register_format :in do
195
-
196
- attr_reader :record_element, :config
197
-
198
- def initialize(parser)
199
- @config = parser.config.dup
200
-
201
- case @record_element = @config.delete(:__record_element)
202
- when String, nil
203
- # fine!
204
- else
205
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
206
- end
207
- end
208
-
209
- end
210
-
211
- def parse(source, &block)
183
+ def parse(input, &block)
212
184
  columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
213
185
 
214
- source.each { |line|
186
+ input.each { |line|
215
187
  case line = line.chomp
216
188
  when /\ACOPY\s+(\S+)\s+\((.+?)\)\s+FROM\s+stdin;\z/i
217
189
  columns[table = $1] = $2.split(/\s*,\s*/)
@@ -242,9 +214,16 @@ module Athena::Formats
242
214
  num
243
215
  end
244
216
 
217
+ private
218
+
219
+ def init_in(*)
220
+ @__record_element_ok__ = [String, nil]
221
+ super
222
+ end
223
+
245
224
  end
246
225
 
247
- MySQL = MYSQL
248
- PgSQL = PGSQL
226
+ class MySQL < MYSQL; private :parse; end
227
+ class PgSQL < PGSQL; private :parse; end
249
228
 
250
229
  end
@@ -42,22 +42,13 @@ module Athena::Formats
42
42
 
43
43
  VALUE_SEPARATOR = '|'
44
44
 
45
- register_format :in do
46
-
47
- attr_reader :specs, :record_element
48
-
49
- def initialize(parser)
50
- @specs = setup_specs(parser.config.dup)
51
- end
45
+ attr_reader :specs
52
46
 
47
+ def parse(input, &block)
48
+ REXML::Document.parse_stream(input, listener(&block))
49
+ Athena::Record.records.size
53
50
  end
54
51
 
55
- def parse(source, &block)
56
- REXML::Document.parse_stream(source, listener(&block))
57
- end
58
-
59
- register_format :out
60
-
61
52
  def convert(record)
62
53
  builder.row {
63
54
  builder.id record.id
@@ -79,7 +70,7 @@ module Athena::Formats
79
70
  }
80
71
  end
81
72
 
82
- register_format! :out, 'xml/flat' do
73
+ class Flat < XML
83
74
 
84
75
  def convert(record)
85
76
  super { |field, struct|
@@ -99,18 +90,43 @@ module Athena::Formats
99
90
 
100
91
  end
101
92
 
102
- def wrap(out = nil)
103
- res = nil
104
- builder(:target => out).database { res = super() }
105
- res
106
- end
107
-
108
93
  def raw?
109
94
  true
110
95
  end
111
96
 
112
97
  private
113
98
 
99
+ def init_in(*)
100
+ @__record_element_ok__ = [String, Array]
101
+ super
102
+
103
+ case @skip_hierarchy = @config.delete(:__skip_hierarchy)
104
+ when Integer
105
+ # fine!
106
+ when nil
107
+ @skip_hierarchy = 0
108
+ else
109
+ raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
110
+ end
111
+
112
+ @specs = {}
113
+
114
+ @config.each { |element, element_spec| element_spec.each { |field, c|
115
+ element.split('/').reverse.inject({}) { |hash, part|
116
+ s = define_spec(element, field, c, hash.empty? ? :default : hash)
117
+ merge_specs(hash, part, s)
118
+ }.each { |key, s|
119
+ merge_specs(@specs, key, s)
120
+ }
121
+ } }
122
+ end
123
+
124
+ def wrap
125
+ res = nil
126
+ builder(:target => output).database { res = super() }
127
+ res
128
+ end
129
+
114
130
  def builder(options = {})
115
131
  @builder ||= begin
116
132
  builder = Builder::XmlMarkup.new({ :indent => 2 }.merge(options))
@@ -129,39 +145,6 @@ module Athena::Formats
129
145
  end
130
146
  end
131
147
 
132
- def setup_specs(config)
133
- case @record_element = config.delete(:__record_element)
134
- when String, Array
135
- # fine!
136
- when nil
137
- raise NoRecordElementError, 'no record element specified'
138
- else
139
- raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
140
- end
141
-
142
- case @skip_hierarchy = config.delete(:__skip_hierarchy)
143
- when Integer
144
- # fine!
145
- when nil
146
- @skip_hierarchy = 0
147
- else
148
- raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
149
- end
150
-
151
- config.inject({}) { |specs, (element, element_spec)|
152
- element_spec.each { |field, c|
153
- element.split('/').reverse.inject({}) { |hash, part|
154
- s = define_spec(element, field, c, hash.empty? ? :default : hash)
155
- merge_specs(hash, part, s)
156
- }.each { |key, s|
157
- merge_specs(specs, key, s)
158
- }
159
- }
160
-
161
- specs
162
- }
163
- end
164
-
165
148
  def listener(&block)
166
149
  record_spec = RecordSpec.new(&block)
167
150
  record_spec.specs!(specs)