athena 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +12 -2
- data/Rakefile +1 -1
- data/bin/athena +1 -1
- data/example/athena_plugin.rb +13 -0
- data/lib/athena.rb +54 -26
- data/lib/athena/cli.rb +18 -41
- data/lib/athena/formats.rb +363 -53
- data/lib/athena/formats/dbm.rb +2 -2
- data/lib/athena/formats/ferret.rb +4 -21
- data/lib/athena/formats/lingo.rb +58 -54
- data/lib/athena/formats/sisis.rb +2 -21
- data/lib/athena/formats/sql.rb +24 -45
- data/lib/athena/formats/xml.rb +36 -53
- data/lib/athena/record.rb +1 -3
- data/lib/athena/version.rb +1 -1
- metadata +24 -24
- data/lib/athena/parser.rb +0 -90
data/lib/athena/formats/dbm.rb
CHANGED
@@ -38,8 +38,6 @@ module Athena::Formats
|
|
38
38
|
|
39
39
|
ICONV_TO_LATIN1 = Iconv.new('latin1//TRANSLIT//IGNORE', 'utf-8')
|
40
40
|
|
41
|
-
register_format :out, 'midos'
|
42
|
-
|
43
41
|
def convert(record)
|
44
42
|
dbm = ["ID:#{record.id}"]
|
45
43
|
|
@@ -70,4 +68,6 @@ module Athena::Formats
|
|
70
68
|
|
71
69
|
end
|
72
70
|
|
71
|
+
Midos = DBM
|
72
|
+
|
73
73
|
end
|
@@ -43,27 +43,8 @@ module Athena::Formats
|
|
43
43
|
|
44
44
|
class Ferret < Base
|
45
45
|
|
46
|
-
|
47
|
-
|
48
|
-
attr_reader :record_element, :config, :match_all_query
|
49
|
-
|
50
|
-
def initialize(parser)
|
51
|
-
@config = parser.config.dup
|
52
|
-
|
53
|
-
case @record_element = @config.delete(:__record_element)
|
54
|
-
when String
|
55
|
-
# fine!
|
56
|
-
when nil
|
57
|
-
raise NoRecordElementError, 'no record element specified'
|
58
|
-
else
|
59
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
end if Object.const_defined?(:Ferret)
|
64
|
-
|
65
|
-
def parse(source, &block)
|
66
|
-
path = source.path
|
46
|
+
def parse(input, &block)
|
47
|
+
path = input.path
|
67
48
|
|
68
49
|
# make sure the index can be opened
|
69
50
|
begin
|
@@ -100,6 +81,8 @@ module Athena::Formats
|
|
100
81
|
index.num_docs
|
101
82
|
end
|
102
83
|
|
84
|
+
private :parse unless Object.const_defined?(:Ferret)
|
85
|
+
|
103
86
|
end
|
104
87
|
|
105
88
|
end
|
data/lib/athena/formats/lingo.rb
CHANGED
@@ -34,51 +34,8 @@ module Athena::Formats
|
|
34
34
|
|
35
35
|
class Lingo < Base
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
MV_SEPARATOR = ';'
|
40
|
-
|
41
|
-
def convert(record)
|
42
|
-
terms = []
|
43
|
-
|
44
|
-
record.struct.each { |field, struct|
|
45
|
-
struct_values = struct[:values]
|
46
|
-
struct_values.default = []
|
47
|
-
|
48
|
-
values = []
|
49
|
-
|
50
|
-
struct[:elements].each { |element|
|
51
|
-
struct_values[element].each { |value|
|
52
|
-
if value
|
53
|
-
value = value.strip.gsub(CRLF_RE, ' ')
|
54
|
-
values << value unless value.empty?
|
55
|
-
end
|
56
|
-
}
|
57
|
-
}
|
58
|
-
|
59
|
-
terms << values
|
60
|
-
}
|
61
|
-
|
62
|
-
terms
|
63
|
-
end
|
64
|
-
|
65
|
-
def deferred?
|
66
|
-
true
|
67
|
-
end
|
68
|
-
|
69
|
-
private
|
70
|
-
|
71
|
-
def check_args(expected, actual, &block)
|
72
|
-
if block ? block[actual] : expected == actual
|
73
|
-
true
|
74
|
-
else
|
75
|
-
warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
|
76
|
-
false
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# "Nasenbär\n"
|
81
|
-
register_format! :out, 'lingo/single_word' do
|
37
|
+
# "Nasenbär"
|
38
|
+
class SingleWord < Lingo
|
82
39
|
|
83
40
|
def convert(record)
|
84
41
|
super.flatten
|
@@ -86,25 +43,29 @@ module Athena::Formats
|
|
86
43
|
|
87
44
|
end
|
88
45
|
|
89
|
-
# "John Vorhauer*Vorhauer, John
|
90
|
-
|
46
|
+
# "John Vorhauer*Vorhauer, John"
|
47
|
+
class KeyValue < Lingo
|
48
|
+
|
49
|
+
SEPARATOR = '*'
|
91
50
|
|
92
51
|
def convert(record)
|
93
52
|
super.map { |terms|
|
94
|
-
terms.join(
|
53
|
+
terms.join(SEPARATOR) if check_args(2, terms.size)
|
95
54
|
}.compact
|
96
55
|
end
|
97
56
|
|
98
57
|
end
|
99
58
|
|
100
|
-
# "Essen,essen #v Essen #s Esse #s
|
101
|
-
|
59
|
+
# "Essen,essen #v Essen #s Esse #s"
|
60
|
+
class WordClass < Lingo
|
61
|
+
|
62
|
+
SEPARATOR = ','
|
102
63
|
|
103
64
|
def convert(record)
|
104
65
|
super.map { |terms|
|
105
66
|
[ terms.shift,
|
106
67
|
terms.to_enum(:each_slice, 2).map { |w, c| "#{w} ##{c}" }.join(' ')
|
107
|
-
].join(
|
68
|
+
].join(SEPARATOR) if check_args('odd, > 1', terms.size) { |actual|
|
108
69
|
actual > 1 && actual % 2 == 1
|
109
70
|
}
|
110
71
|
}.compact
|
@@ -112,12 +73,14 @@ module Athena::Formats
|
|
112
73
|
|
113
74
|
end
|
114
75
|
|
115
|
-
# "Fax;Faxkopie;Telefax
|
116
|
-
|
76
|
+
# "Fax;Faxkopie;Telefax"
|
77
|
+
class MultiValue < Lingo
|
78
|
+
|
79
|
+
SEPARATOR = ';'
|
117
80
|
|
118
81
|
def convert(record)
|
119
82
|
super.map { |terms|
|
120
|
-
terms.join(
|
83
|
+
terms.join(SEPARATOR) if check_args('> 1', terms.size) { |actual|
|
121
84
|
actual > 1
|
122
85
|
}
|
123
86
|
}.compact
|
@@ -125,6 +88,47 @@ module Athena::Formats
|
|
125
88
|
|
126
89
|
end
|
127
90
|
|
91
|
+
MultiKey = MultiValue
|
92
|
+
|
93
|
+
def deferred?
|
94
|
+
true
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def convert(record)
|
100
|
+
terms = []
|
101
|
+
|
102
|
+
record.struct.each { |field, struct|
|
103
|
+
struct_values = struct[:values]
|
104
|
+
struct_values.default = []
|
105
|
+
|
106
|
+
values = []
|
107
|
+
|
108
|
+
struct[:elements].each { |element|
|
109
|
+
struct_values[element].each { |value|
|
110
|
+
if value
|
111
|
+
value = value.strip.gsub(CRLF_RE, ' ')
|
112
|
+
values << value unless value.empty?
|
113
|
+
end
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
terms << values
|
118
|
+
}
|
119
|
+
|
120
|
+
terms
|
121
|
+
end
|
122
|
+
|
123
|
+
def check_args(expected, actual, &block)
|
124
|
+
if block ? block[actual] : expected == actual
|
125
|
+
true
|
126
|
+
else
|
127
|
+
warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
|
128
|
+
false
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
128
132
|
end
|
129
133
|
|
130
134
|
end
|
data/lib/athena/formats/sisis.rb
CHANGED
@@ -34,29 +34,10 @@ module Athena::Formats
|
|
34
34
|
|
35
35
|
RECORD_RE = %r{(\d+).*?:\s*(.*)}
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
attr_reader :record_element, :config
|
40
|
-
|
41
|
-
def initialize(parser)
|
42
|
-
@config = parser.config.dup
|
43
|
-
|
44
|
-
case @record_element = @config.delete(:__record_element)
|
45
|
-
when String
|
46
|
-
# fine!
|
47
|
-
when nil
|
48
|
-
raise NoRecordElementError, 'no record element specified'
|
49
|
-
else
|
50
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
def parse(source, &block)
|
37
|
+
def parse(input, &block)
|
57
38
|
record, num = nil, 0
|
58
39
|
|
59
|
-
|
40
|
+
input.each { |line|
|
60
41
|
element, value = line.match(RECORD_RE)[1, 2]
|
61
42
|
|
62
43
|
if element == record_element
|
data/lib/athena/formats/sql.rb
CHANGED
@@ -33,29 +33,12 @@ module Athena::Formats
|
|
33
33
|
|
34
34
|
class MYSQL < Base
|
35
35
|
|
36
|
-
|
36
|
+
attr_reader :sql_parser
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
def initialize(parser)
|
41
|
-
@config = parser.config.dup
|
42
|
-
|
43
|
-
case @record_element = @config.delete(:__record_element)
|
44
|
-
when String, nil
|
45
|
-
# fine!
|
46
|
-
else
|
47
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
48
|
-
end
|
49
|
-
|
50
|
-
@sql_parser = SQLParser.new
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
def parse(source, &block)
|
38
|
+
def parse(input, &block)
|
56
39
|
columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
|
57
40
|
|
58
|
-
|
41
|
+
input.each { |line|
|
59
42
|
case line = line.chomp
|
60
43
|
when /\ACREATE\s+TABLE\s+`(.+?)`/i
|
61
44
|
table = $1
|
@@ -88,6 +71,14 @@ module Athena::Formats
|
|
88
71
|
num
|
89
72
|
end
|
90
73
|
|
74
|
+
private
|
75
|
+
|
76
|
+
def init_in(*)
|
77
|
+
@__record_element_ok__ = [String, nil]
|
78
|
+
super
|
79
|
+
@sql_parser = SQLParser.new
|
80
|
+
end
|
81
|
+
|
91
82
|
class SQLParser
|
92
83
|
|
93
84
|
AST = Struct.new(:value)
|
@@ -104,7 +95,7 @@ module Athena::Formats
|
|
104
95
|
while result = parse_row
|
105
96
|
row = result.value
|
106
97
|
block_given ? yield(row) : rows << row
|
107
|
-
break unless @input.scan(
|
98
|
+
break unless @input.scan(/,\s*/)
|
108
99
|
end
|
109
100
|
|
110
101
|
@input.scan(/;/) # optional
|
@@ -114,8 +105,6 @@ module Athena::Formats
|
|
114
105
|
rows unless block_given
|
115
106
|
end
|
116
107
|
|
117
|
-
private
|
118
|
-
|
119
108
|
def parse_row
|
120
109
|
return unless @input.scan(/\(/)
|
121
110
|
|
@@ -123,7 +112,7 @@ module Athena::Formats
|
|
123
112
|
|
124
113
|
while result = parse_value
|
125
114
|
row << result.value
|
126
|
-
break unless @input.scan(
|
115
|
+
break unless @input.scan(/,\s*/)
|
127
116
|
end
|
128
117
|
|
129
118
|
error('Unclosed row') unless @input.scan(/\)/)
|
@@ -191,27 +180,10 @@ module Athena::Formats
|
|
191
180
|
|
192
181
|
class PGSQL < Base
|
193
182
|
|
194
|
-
|
195
|
-
|
196
|
-
attr_reader :record_element, :config
|
197
|
-
|
198
|
-
def initialize(parser)
|
199
|
-
@config = parser.config.dup
|
200
|
-
|
201
|
-
case @record_element = @config.delete(:__record_element)
|
202
|
-
when String, nil
|
203
|
-
# fine!
|
204
|
-
else
|
205
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
206
|
-
end
|
207
|
-
end
|
208
|
-
|
209
|
-
end
|
210
|
-
|
211
|
-
def parse(source, &block)
|
183
|
+
def parse(input, &block)
|
212
184
|
columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
|
213
185
|
|
214
|
-
|
186
|
+
input.each { |line|
|
215
187
|
case line = line.chomp
|
216
188
|
when /\ACOPY\s+(\S+)\s+\((.+?)\)\s+FROM\s+stdin;\z/i
|
217
189
|
columns[table = $1] = $2.split(/\s*,\s*/)
|
@@ -242,9 +214,16 @@ module Athena::Formats
|
|
242
214
|
num
|
243
215
|
end
|
244
216
|
|
217
|
+
private
|
218
|
+
|
219
|
+
def init_in(*)
|
220
|
+
@__record_element_ok__ = [String, nil]
|
221
|
+
super
|
222
|
+
end
|
223
|
+
|
245
224
|
end
|
246
225
|
|
247
|
-
MySQL
|
248
|
-
PgSQL
|
226
|
+
class MySQL < MYSQL; private :parse; end
|
227
|
+
class PgSQL < PGSQL; private :parse; end
|
249
228
|
|
250
229
|
end
|
data/lib/athena/formats/xml.rb
CHANGED
@@ -42,22 +42,13 @@ module Athena::Formats
|
|
42
42
|
|
43
43
|
VALUE_SEPARATOR = '|'
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
attr_reader :specs, :record_element
|
48
|
-
|
49
|
-
def initialize(parser)
|
50
|
-
@specs = setup_specs(parser.config.dup)
|
51
|
-
end
|
45
|
+
attr_reader :specs
|
52
46
|
|
47
|
+
def parse(input, &block)
|
48
|
+
REXML::Document.parse_stream(input, listener(&block))
|
49
|
+
Athena::Record.records.size
|
53
50
|
end
|
54
51
|
|
55
|
-
def parse(source, &block)
|
56
|
-
REXML::Document.parse_stream(source, listener(&block))
|
57
|
-
end
|
58
|
-
|
59
|
-
register_format :out
|
60
|
-
|
61
52
|
def convert(record)
|
62
53
|
builder.row {
|
63
54
|
builder.id record.id
|
@@ -79,7 +70,7 @@ module Athena::Formats
|
|
79
70
|
}
|
80
71
|
end
|
81
72
|
|
82
|
-
|
73
|
+
class Flat < XML
|
83
74
|
|
84
75
|
def convert(record)
|
85
76
|
super { |field, struct|
|
@@ -99,18 +90,43 @@ module Athena::Formats
|
|
99
90
|
|
100
91
|
end
|
101
92
|
|
102
|
-
def wrap(out = nil)
|
103
|
-
res = nil
|
104
|
-
builder(:target => out).database { res = super() }
|
105
|
-
res
|
106
|
-
end
|
107
|
-
|
108
93
|
def raw?
|
109
94
|
true
|
110
95
|
end
|
111
96
|
|
112
97
|
private
|
113
98
|
|
99
|
+
def init_in(*)
|
100
|
+
@__record_element_ok__ = [String, Array]
|
101
|
+
super
|
102
|
+
|
103
|
+
case @skip_hierarchy = @config.delete(:__skip_hierarchy)
|
104
|
+
when Integer
|
105
|
+
# fine!
|
106
|
+
when nil
|
107
|
+
@skip_hierarchy = 0
|
108
|
+
else
|
109
|
+
raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
|
110
|
+
end
|
111
|
+
|
112
|
+
@specs = {}
|
113
|
+
|
114
|
+
@config.each { |element, element_spec| element_spec.each { |field, c|
|
115
|
+
element.split('/').reverse.inject({}) { |hash, part|
|
116
|
+
s = define_spec(element, field, c, hash.empty? ? :default : hash)
|
117
|
+
merge_specs(hash, part, s)
|
118
|
+
}.each { |key, s|
|
119
|
+
merge_specs(@specs, key, s)
|
120
|
+
}
|
121
|
+
} }
|
122
|
+
end
|
123
|
+
|
124
|
+
def wrap
|
125
|
+
res = nil
|
126
|
+
builder(:target => output).database { res = super() }
|
127
|
+
res
|
128
|
+
end
|
129
|
+
|
114
130
|
def builder(options = {})
|
115
131
|
@builder ||= begin
|
116
132
|
builder = Builder::XmlMarkup.new({ :indent => 2 }.merge(options))
|
@@ -129,39 +145,6 @@ module Athena::Formats
|
|
129
145
|
end
|
130
146
|
end
|
131
147
|
|
132
|
-
def setup_specs(config)
|
133
|
-
case @record_element = config.delete(:__record_element)
|
134
|
-
when String, Array
|
135
|
-
# fine!
|
136
|
-
when nil
|
137
|
-
raise NoRecordElementError, 'no record element specified'
|
138
|
-
else
|
139
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
140
|
-
end
|
141
|
-
|
142
|
-
case @skip_hierarchy = config.delete(:__skip_hierarchy)
|
143
|
-
when Integer
|
144
|
-
# fine!
|
145
|
-
when nil
|
146
|
-
@skip_hierarchy = 0
|
147
|
-
else
|
148
|
-
raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
|
149
|
-
end
|
150
|
-
|
151
|
-
config.inject({}) { |specs, (element, element_spec)|
|
152
|
-
element_spec.each { |field, c|
|
153
|
-
element.split('/').reverse.inject({}) { |hash, part|
|
154
|
-
s = define_spec(element, field, c, hash.empty? ? :default : hash)
|
155
|
-
merge_specs(hash, part, s)
|
156
|
-
}.each { |key, s|
|
157
|
-
merge_specs(specs, key, s)
|
158
|
-
}
|
159
|
-
}
|
160
|
-
|
161
|
-
specs
|
162
|
-
}
|
163
|
-
end
|
164
|
-
|
165
148
|
def listener(&block)
|
166
149
|
record_spec = RecordSpec.new(&block)
|
167
150
|
record_spec.specs!(specs)
|