athena 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +12 -2
- data/Rakefile +1 -1
- data/bin/athena +1 -1
- data/example/athena_plugin.rb +13 -0
- data/lib/athena.rb +54 -26
- data/lib/athena/cli.rb +18 -41
- data/lib/athena/formats.rb +363 -53
- data/lib/athena/formats/dbm.rb +2 -2
- data/lib/athena/formats/ferret.rb +4 -21
- data/lib/athena/formats/lingo.rb +58 -54
- data/lib/athena/formats/sisis.rb +2 -21
- data/lib/athena/formats/sql.rb +24 -45
- data/lib/athena/formats/xml.rb +36 -53
- data/lib/athena/record.rb +1 -3
- data/lib/athena/version.rb +1 -1
- metadata +24 -24
- data/lib/athena/parser.rb +0 -90
data/lib/athena/formats/dbm.rb
CHANGED
@@ -38,8 +38,6 @@ module Athena::Formats
|
|
38
38
|
|
39
39
|
ICONV_TO_LATIN1 = Iconv.new('latin1//TRANSLIT//IGNORE', 'utf-8')
|
40
40
|
|
41
|
-
register_format :out, 'midos'
|
42
|
-
|
43
41
|
def convert(record)
|
44
42
|
dbm = ["ID:#{record.id}"]
|
45
43
|
|
@@ -70,4 +68,6 @@ module Athena::Formats
|
|
70
68
|
|
71
69
|
end
|
72
70
|
|
71
|
+
Midos = DBM
|
72
|
+
|
73
73
|
end
|
@@ -43,27 +43,8 @@ module Athena::Formats
|
|
43
43
|
|
44
44
|
class Ferret < Base
|
45
45
|
|
46
|
-
|
47
|
-
|
48
|
-
attr_reader :record_element, :config, :match_all_query
|
49
|
-
|
50
|
-
def initialize(parser)
|
51
|
-
@config = parser.config.dup
|
52
|
-
|
53
|
-
case @record_element = @config.delete(:__record_element)
|
54
|
-
when String
|
55
|
-
# fine!
|
56
|
-
when nil
|
57
|
-
raise NoRecordElementError, 'no record element specified'
|
58
|
-
else
|
59
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
end if Object.const_defined?(:Ferret)
|
64
|
-
|
65
|
-
def parse(source, &block)
|
66
|
-
path = source.path
|
46
|
+
def parse(input, &block)
|
47
|
+
path = input.path
|
67
48
|
|
68
49
|
# make sure the index can be opened
|
69
50
|
begin
|
@@ -100,6 +81,8 @@ module Athena::Formats
|
|
100
81
|
index.num_docs
|
101
82
|
end
|
102
83
|
|
84
|
+
private :parse unless Object.const_defined?(:Ferret)
|
85
|
+
|
103
86
|
end
|
104
87
|
|
105
88
|
end
|
data/lib/athena/formats/lingo.rb
CHANGED
@@ -34,51 +34,8 @@ module Athena::Formats
|
|
34
34
|
|
35
35
|
class Lingo < Base
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
MV_SEPARATOR = ';'
|
40
|
-
|
41
|
-
def convert(record)
|
42
|
-
terms = []
|
43
|
-
|
44
|
-
record.struct.each { |field, struct|
|
45
|
-
struct_values = struct[:values]
|
46
|
-
struct_values.default = []
|
47
|
-
|
48
|
-
values = []
|
49
|
-
|
50
|
-
struct[:elements].each { |element|
|
51
|
-
struct_values[element].each { |value|
|
52
|
-
if value
|
53
|
-
value = value.strip.gsub(CRLF_RE, ' ')
|
54
|
-
values << value unless value.empty?
|
55
|
-
end
|
56
|
-
}
|
57
|
-
}
|
58
|
-
|
59
|
-
terms << values
|
60
|
-
}
|
61
|
-
|
62
|
-
terms
|
63
|
-
end
|
64
|
-
|
65
|
-
def deferred?
|
66
|
-
true
|
67
|
-
end
|
68
|
-
|
69
|
-
private
|
70
|
-
|
71
|
-
def check_args(expected, actual, &block)
|
72
|
-
if block ? block[actual] : expected == actual
|
73
|
-
true
|
74
|
-
else
|
75
|
-
warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
|
76
|
-
false
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# "Nasenbär\n"
|
81
|
-
register_format! :out, 'lingo/single_word' do
|
37
|
+
# "Nasenbär"
|
38
|
+
class SingleWord < Lingo
|
82
39
|
|
83
40
|
def convert(record)
|
84
41
|
super.flatten
|
@@ -86,25 +43,29 @@ module Athena::Formats
|
|
86
43
|
|
87
44
|
end
|
88
45
|
|
89
|
-
# "John Vorhauer*Vorhauer, John
|
90
|
-
|
46
|
+
# "John Vorhauer*Vorhauer, John"
|
47
|
+
class KeyValue < Lingo
|
48
|
+
|
49
|
+
SEPARATOR = '*'
|
91
50
|
|
92
51
|
def convert(record)
|
93
52
|
super.map { |terms|
|
94
|
-
terms.join(
|
53
|
+
terms.join(SEPARATOR) if check_args(2, terms.size)
|
95
54
|
}.compact
|
96
55
|
end
|
97
56
|
|
98
57
|
end
|
99
58
|
|
100
|
-
# "Essen,essen #v Essen #s Esse #s
|
101
|
-
|
59
|
+
# "Essen,essen #v Essen #s Esse #s"
|
60
|
+
class WordClass < Lingo
|
61
|
+
|
62
|
+
SEPARATOR = ','
|
102
63
|
|
103
64
|
def convert(record)
|
104
65
|
super.map { |terms|
|
105
66
|
[ terms.shift,
|
106
67
|
terms.to_enum(:each_slice, 2).map { |w, c| "#{w} ##{c}" }.join(' ')
|
107
|
-
].join(
|
68
|
+
].join(SEPARATOR) if check_args('odd, > 1', terms.size) { |actual|
|
108
69
|
actual > 1 && actual % 2 == 1
|
109
70
|
}
|
110
71
|
}.compact
|
@@ -112,12 +73,14 @@ module Athena::Formats
|
|
112
73
|
|
113
74
|
end
|
114
75
|
|
115
|
-
# "Fax;Faxkopie;Telefax
|
116
|
-
|
76
|
+
# "Fax;Faxkopie;Telefax"
|
77
|
+
class MultiValue < Lingo
|
78
|
+
|
79
|
+
SEPARATOR = ';'
|
117
80
|
|
118
81
|
def convert(record)
|
119
82
|
super.map { |terms|
|
120
|
-
terms.join(
|
83
|
+
terms.join(SEPARATOR) if check_args('> 1', terms.size) { |actual|
|
121
84
|
actual > 1
|
122
85
|
}
|
123
86
|
}.compact
|
@@ -125,6 +88,47 @@ module Athena::Formats
|
|
125
88
|
|
126
89
|
end
|
127
90
|
|
91
|
+
MultiKey = MultiValue
|
92
|
+
|
93
|
+
def deferred?
|
94
|
+
true
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def convert(record)
|
100
|
+
terms = []
|
101
|
+
|
102
|
+
record.struct.each { |field, struct|
|
103
|
+
struct_values = struct[:values]
|
104
|
+
struct_values.default = []
|
105
|
+
|
106
|
+
values = []
|
107
|
+
|
108
|
+
struct[:elements].each { |element|
|
109
|
+
struct_values[element].each { |value|
|
110
|
+
if value
|
111
|
+
value = value.strip.gsub(CRLF_RE, ' ')
|
112
|
+
values << value unless value.empty?
|
113
|
+
end
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
terms << values
|
118
|
+
}
|
119
|
+
|
120
|
+
terms
|
121
|
+
end
|
122
|
+
|
123
|
+
def check_args(expected, actual, &block)
|
124
|
+
if block ? block[actual] : expected == actual
|
125
|
+
true
|
126
|
+
else
|
127
|
+
warn "wrong number of arguments for #{self} (#{actual} for #{expected})"
|
128
|
+
false
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
128
132
|
end
|
129
133
|
|
130
134
|
end
|
data/lib/athena/formats/sisis.rb
CHANGED
@@ -34,29 +34,10 @@ module Athena::Formats
|
|
34
34
|
|
35
35
|
RECORD_RE = %r{(\d+).*?:\s*(.*)}
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
attr_reader :record_element, :config
|
40
|
-
|
41
|
-
def initialize(parser)
|
42
|
-
@config = parser.config.dup
|
43
|
-
|
44
|
-
case @record_element = @config.delete(:__record_element)
|
45
|
-
when String
|
46
|
-
# fine!
|
47
|
-
when nil
|
48
|
-
raise NoRecordElementError, 'no record element specified'
|
49
|
-
else
|
50
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
def parse(source, &block)
|
37
|
+
def parse(input, &block)
|
57
38
|
record, num = nil, 0
|
58
39
|
|
59
|
-
|
40
|
+
input.each { |line|
|
60
41
|
element, value = line.match(RECORD_RE)[1, 2]
|
61
42
|
|
62
43
|
if element == record_element
|
data/lib/athena/formats/sql.rb
CHANGED
@@ -33,29 +33,12 @@ module Athena::Formats
|
|
33
33
|
|
34
34
|
class MYSQL < Base
|
35
35
|
|
36
|
-
|
36
|
+
attr_reader :sql_parser
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
def initialize(parser)
|
41
|
-
@config = parser.config.dup
|
42
|
-
|
43
|
-
case @record_element = @config.delete(:__record_element)
|
44
|
-
when String, nil
|
45
|
-
# fine!
|
46
|
-
else
|
47
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
48
|
-
end
|
49
|
-
|
50
|
-
@sql_parser = SQLParser.new
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
def parse(source, &block)
|
38
|
+
def parse(input, &block)
|
56
39
|
columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
|
57
40
|
|
58
|
-
|
41
|
+
input.each { |line|
|
59
42
|
case line = line.chomp
|
60
43
|
when /\ACREATE\s+TABLE\s+`(.+?)`/i
|
61
44
|
table = $1
|
@@ -88,6 +71,14 @@ module Athena::Formats
|
|
88
71
|
num
|
89
72
|
end
|
90
73
|
|
74
|
+
private
|
75
|
+
|
76
|
+
def init_in(*)
|
77
|
+
@__record_element_ok__ = [String, nil]
|
78
|
+
super
|
79
|
+
@sql_parser = SQLParser.new
|
80
|
+
end
|
81
|
+
|
91
82
|
class SQLParser
|
92
83
|
|
93
84
|
AST = Struct.new(:value)
|
@@ -104,7 +95,7 @@ module Athena::Formats
|
|
104
95
|
while result = parse_row
|
105
96
|
row = result.value
|
106
97
|
block_given ? yield(row) : rows << row
|
107
|
-
break unless @input.scan(
|
98
|
+
break unless @input.scan(/,\s*/)
|
108
99
|
end
|
109
100
|
|
110
101
|
@input.scan(/;/) # optional
|
@@ -114,8 +105,6 @@ module Athena::Formats
|
|
114
105
|
rows unless block_given
|
115
106
|
end
|
116
107
|
|
117
|
-
private
|
118
|
-
|
119
108
|
def parse_row
|
120
109
|
return unless @input.scan(/\(/)
|
121
110
|
|
@@ -123,7 +112,7 @@ module Athena::Formats
|
|
123
112
|
|
124
113
|
while result = parse_value
|
125
114
|
row << result.value
|
126
|
-
break unless @input.scan(
|
115
|
+
break unless @input.scan(/,\s*/)
|
127
116
|
end
|
128
117
|
|
129
118
|
error('Unclosed row') unless @input.scan(/\)/)
|
@@ -191,27 +180,10 @@ module Athena::Formats
|
|
191
180
|
|
192
181
|
class PGSQL < Base
|
193
182
|
|
194
|
-
|
195
|
-
|
196
|
-
attr_reader :record_element, :config
|
197
|
-
|
198
|
-
def initialize(parser)
|
199
|
-
@config = parser.config.dup
|
200
|
-
|
201
|
-
case @record_element = @config.delete(:__record_element)
|
202
|
-
when String, nil
|
203
|
-
# fine!
|
204
|
-
else
|
205
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
206
|
-
end
|
207
|
-
end
|
208
|
-
|
209
|
-
end
|
210
|
-
|
211
|
-
def parse(source, &block)
|
183
|
+
def parse(input, &block)
|
212
184
|
columns, table, num = Hash.new { |h, k| h[k] = [] }, nil, 0
|
213
185
|
|
214
|
-
|
186
|
+
input.each { |line|
|
215
187
|
case line = line.chomp
|
216
188
|
when /\ACOPY\s+(\S+)\s+\((.+?)\)\s+FROM\s+stdin;\z/i
|
217
189
|
columns[table = $1] = $2.split(/\s*,\s*/)
|
@@ -242,9 +214,16 @@ module Athena::Formats
|
|
242
214
|
num
|
243
215
|
end
|
244
216
|
|
217
|
+
private
|
218
|
+
|
219
|
+
def init_in(*)
|
220
|
+
@__record_element_ok__ = [String, nil]
|
221
|
+
super
|
222
|
+
end
|
223
|
+
|
245
224
|
end
|
246
225
|
|
247
|
-
MySQL
|
248
|
-
PgSQL
|
226
|
+
class MySQL < MYSQL; private :parse; end
|
227
|
+
class PgSQL < PGSQL; private :parse; end
|
249
228
|
|
250
229
|
end
|
data/lib/athena/formats/xml.rb
CHANGED
@@ -42,22 +42,13 @@ module Athena::Formats
|
|
42
42
|
|
43
43
|
VALUE_SEPARATOR = '|'
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
attr_reader :specs, :record_element
|
48
|
-
|
49
|
-
def initialize(parser)
|
50
|
-
@specs = setup_specs(parser.config.dup)
|
51
|
-
end
|
45
|
+
attr_reader :specs
|
52
46
|
|
47
|
+
def parse(input, &block)
|
48
|
+
REXML::Document.parse_stream(input, listener(&block))
|
49
|
+
Athena::Record.records.size
|
53
50
|
end
|
54
51
|
|
55
|
-
def parse(source, &block)
|
56
|
-
REXML::Document.parse_stream(source, listener(&block))
|
57
|
-
end
|
58
|
-
|
59
|
-
register_format :out
|
60
|
-
|
61
52
|
def convert(record)
|
62
53
|
builder.row {
|
63
54
|
builder.id record.id
|
@@ -79,7 +70,7 @@ module Athena::Formats
|
|
79
70
|
}
|
80
71
|
end
|
81
72
|
|
82
|
-
|
73
|
+
class Flat < XML
|
83
74
|
|
84
75
|
def convert(record)
|
85
76
|
super { |field, struct|
|
@@ -99,18 +90,43 @@ module Athena::Formats
|
|
99
90
|
|
100
91
|
end
|
101
92
|
|
102
|
-
def wrap(out = nil)
|
103
|
-
res = nil
|
104
|
-
builder(:target => out).database { res = super() }
|
105
|
-
res
|
106
|
-
end
|
107
|
-
|
108
93
|
def raw?
|
109
94
|
true
|
110
95
|
end
|
111
96
|
|
112
97
|
private
|
113
98
|
|
99
|
+
def init_in(*)
|
100
|
+
@__record_element_ok__ = [String, Array]
|
101
|
+
super
|
102
|
+
|
103
|
+
case @skip_hierarchy = @config.delete(:__skip_hierarchy)
|
104
|
+
when Integer
|
105
|
+
# fine!
|
106
|
+
when nil
|
107
|
+
@skip_hierarchy = 0
|
108
|
+
else
|
109
|
+
raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
|
110
|
+
end
|
111
|
+
|
112
|
+
@specs = {}
|
113
|
+
|
114
|
+
@config.each { |element, element_spec| element_spec.each { |field, c|
|
115
|
+
element.split('/').reverse.inject({}) { |hash, part|
|
116
|
+
s = define_spec(element, field, c, hash.empty? ? :default : hash)
|
117
|
+
merge_specs(hash, part, s)
|
118
|
+
}.each { |key, s|
|
119
|
+
merge_specs(@specs, key, s)
|
120
|
+
}
|
121
|
+
} }
|
122
|
+
end
|
123
|
+
|
124
|
+
def wrap
|
125
|
+
res = nil
|
126
|
+
builder(:target => output).database { res = super() }
|
127
|
+
res
|
128
|
+
end
|
129
|
+
|
114
130
|
def builder(options = {})
|
115
131
|
@builder ||= begin
|
116
132
|
builder = Builder::XmlMarkup.new({ :indent => 2 }.merge(options))
|
@@ -129,39 +145,6 @@ module Athena::Formats
|
|
129
145
|
end
|
130
146
|
end
|
131
147
|
|
132
|
-
def setup_specs(config)
|
133
|
-
case @record_element = config.delete(:__record_element)
|
134
|
-
when String, Array
|
135
|
-
# fine!
|
136
|
-
when nil
|
137
|
-
raise NoRecordElementError, 'no record element specified'
|
138
|
-
else
|
139
|
-
raise IllegalRecordElementError, "illegal record element #{@record_element.inspect}"
|
140
|
-
end
|
141
|
-
|
142
|
-
case @skip_hierarchy = config.delete(:__skip_hierarchy)
|
143
|
-
when Integer
|
144
|
-
# fine!
|
145
|
-
when nil
|
146
|
-
@skip_hierarchy = 0
|
147
|
-
else
|
148
|
-
raise ConfigError, "illegal value #{@skip_hierarchy.inspect} for skip hierarchy"
|
149
|
-
end
|
150
|
-
|
151
|
-
config.inject({}) { |specs, (element, element_spec)|
|
152
|
-
element_spec.each { |field, c|
|
153
|
-
element.split('/').reverse.inject({}) { |hash, part|
|
154
|
-
s = define_spec(element, field, c, hash.empty? ? :default : hash)
|
155
|
-
merge_specs(hash, part, s)
|
156
|
-
}.each { |key, s|
|
157
|
-
merge_specs(specs, key, s)
|
158
|
-
}
|
159
|
-
}
|
160
|
-
|
161
|
-
specs
|
162
|
-
}
|
163
|
-
end
|
164
|
-
|
165
148
|
def listener(&block)
|
166
149
|
record_spec = RecordSpec.new(&block)
|
167
150
|
record_spec.specs!(specs)
|