fluent-plugin-redshift 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +33 -10
- data/VERSION +1 -1
- data/lib/fluent/plugin/out_redshift.rb +75 -31
- data/test/plugin/test_out_redshift.rb +139 -4
- metadata +15 -29
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0c2ea0e593b59bd6c6f6dae6f6b95fe496ec059e
|
4
|
+
data.tar.gz: 3f7e925db5e64d32eaa446f0e969f32ce7137715
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 20247b545d6a63ecb4ba6b773338ae77416c01c4e567bcdedeb69b0668da7560b24ab91a844c702aa37c2aa74477038d4c00818dd17cba6804ce91155f642ef8
|
7
|
+
data.tar.gz: 1a556e377f72c38430289154f98bf6f81da5737e7a09821153a9846fb5c67ad0878a5cce510513874d90b81dbfdee9370d2e3076be3ddc1f4dc53faf57633be1
|
data/README.md
CHANGED
@@ -15,7 +15,7 @@ Format:
|
|
15
15
|
|
16
16
|
<match my.tag>
|
17
17
|
type redshift
|
18
|
-
|
18
|
+
|
19
19
|
# s3 (for copying data to redshift)
|
20
20
|
aws_key_id YOUR_AWS_KEY_ID
|
21
21
|
aws_sec_key YOUR_AWS_SECRET_KEY
|
@@ -23,16 +23,17 @@ Format:
|
|
23
23
|
s3_endpoint YOUR_S3_BUCKET_END_POINT
|
24
24
|
path YOUR_S3_PATH
|
25
25
|
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
26
|
-
|
26
|
+
|
27
27
|
# redshift
|
28
28
|
redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
|
29
29
|
redshift_port YOUR_AMAZON_REDSHIFT_CLUSTER_PORT
|
30
30
|
redshift_dbname YOUR_AMAZON_REDSHIFT_CLUSTER_DATABASE_NAME
|
31
31
|
redshift_user YOUR_AMAZON_REDSHIFT_CLUSTER_USER_NAME
|
32
32
|
redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
|
33
|
+
redshift_schemaname YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_SCHEMA_NAME
|
33
34
|
redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
|
34
|
-
file_type [tsv|csv|json]
|
35
|
-
|
35
|
+
file_type [tsv|csv|json|msgpack]
|
36
|
+
|
36
37
|
# buffer
|
37
38
|
buffer_type file
|
38
39
|
buffer_path /var/log/fluent/redshift
|
@@ -49,18 +50,18 @@ Example (watch and upload json formatted apache log):
|
|
49
50
|
tag redshift.json
|
50
51
|
format /^(?<log>.*)$/
|
51
52
|
</source>
|
52
|
-
|
53
|
+
|
53
54
|
<match redshift.json>
|
54
55
|
type redshift
|
55
|
-
|
56
|
+
|
56
57
|
# s3 (for copying data to redshift)
|
57
58
|
aws_key_id YOUR_AWS_KEY_ID
|
58
59
|
aws_sec_key YOUR_AWS_SECRET_KEY
|
59
60
|
s3_bucket hapyrus-example
|
60
61
|
s3_endpoint s3.amazonaws.com
|
61
|
-
path apache_json_log
|
62
|
+
path path/on/s3/apache_json_log/
|
62
63
|
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
63
|
-
|
64
|
+
|
64
65
|
# redshift
|
65
66
|
redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
|
66
67
|
redshift_port 5439
|
@@ -69,7 +70,7 @@ Example (watch and upload json formatted apache log):
|
|
69
70
|
redshift_password fluent-password
|
70
71
|
redshift_tablename apache_log
|
71
72
|
file_type json
|
72
|
-
|
73
|
+
|
73
74
|
# buffer
|
74
75
|
buffer_type file
|
75
76
|
buffer_path /var/log/fluent/redshift
|
@@ -110,7 +111,9 @@ Example (watch and upload json formatted apache log):
|
|
110
111
|
|
111
112
|
+ `redshift_tablename` (required) : table name to store data.
|
112
113
|
|
113
|
-
+ `
|
114
|
+
+ `redshift_schemaname` : schema name to store data. By default, this option is not set and find table without schema as your own search_path.
|
115
|
+
|
116
|
+
+ `file_type` : file format of the source data. `csv`, `tsv`, `msgpack` or `json` are available.
|
114
117
|
|
115
118
|
+ `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
|
116
119
|
|
@@ -124,6 +127,26 @@ Example (watch and upload json formatted apache log):
|
|
124
127
|
|
125
128
|
+ `utc` : utc time zone. This parameter affects `timestamp_key_format`.
|
126
129
|
|
130
|
+
## Logging examples
|
131
|
+
```ruby
|
132
|
+
# examples by fluent-logger
|
133
|
+
require 'fluent-logger'
|
134
|
+
log = Fluent::Logger::FluentLogger.new(nil, :host => 'localhost', :port => 24224)
|
135
|
+
|
136
|
+
# file_type: csv
|
137
|
+
log.post('your.tag', :log => "12345,12345")
|
138
|
+
|
139
|
+
# file_type: tsv
|
140
|
+
log.post('your.tag', :log => "12345\t12345")
|
141
|
+
|
142
|
+
# file_type: json
|
143
|
+
require 'json'
|
144
|
+
log.post('your.tag', :log => { :user_id => 12345, :data_id => 12345 }.to_json)
|
145
|
+
|
146
|
+
# file_type: msgpack
|
147
|
+
log.post('your.tag', :user_id => 12345, :data_id => 12345)
|
148
|
+
```
|
149
|
+
|
127
150
|
## License
|
128
151
|
|
129
152
|
Copyright (c) 2013 [Hapyrus Inc](http://hapyrus.com)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
@@ -34,16 +34,19 @@ class RedshiftOutput < BufferedOutput
|
|
34
34
|
config_param :redshift_user, :string
|
35
35
|
config_param :redshift_password, :string
|
36
36
|
config_param :redshift_tablename, :string
|
37
|
+
config_param :redshift_schemaname, :string, :default => nil
|
38
|
+
config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
|
39
|
+
config_param :redshift_copy_options, :string , :default => nil
|
37
40
|
# file format
|
38
|
-
config_param :file_type, :string, :default => nil # json, tsv, csv
|
41
|
+
config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
|
39
42
|
config_param :delimiter, :string, :default => nil
|
40
43
|
# for debug
|
41
44
|
config_param :log_suffix, :string, :default => ''
|
42
45
|
|
43
46
|
def configure(conf)
|
44
47
|
super
|
45
|
-
@path = "#{@path}/"
|
46
|
-
@path =
|
48
|
+
@path = "#{@path}/" unless @path.end_with?('/') # append last slash
|
49
|
+
@path = @path[1..-1] if @path.start_with?('/') # remove head slash
|
47
50
|
@utc = true if conf['utc']
|
48
51
|
@db_conf = {
|
49
52
|
host:@redshift_host,
|
@@ -54,7 +57,7 @@ class RedshiftOutput < BufferedOutput
|
|
54
57
|
}
|
55
58
|
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
56
59
|
$log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
|
57
|
-
@copy_sql_template = "copy #{
|
60
|
+
@copy_sql_template = "copy #{table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
|
58
61
|
end
|
59
62
|
|
60
63
|
def start
|
@@ -70,7 +73,13 @@ class RedshiftOutput < BufferedOutput
|
|
70
73
|
end
|
71
74
|
|
72
75
|
def format(tag, time, record)
|
73
|
-
|
76
|
+
if json?
|
77
|
+
record.to_msgpack
|
78
|
+
elsif msgpack?
|
79
|
+
{ @record_log_tag => record }.to_msgpack
|
80
|
+
else
|
81
|
+
"#{record[@record_log_tag]}\n"
|
82
|
+
end
|
74
83
|
end
|
75
84
|
|
76
85
|
def write(chunk)
|
@@ -78,8 +87,12 @@ class RedshiftOutput < BufferedOutput
|
|
78
87
|
|
79
88
|
# create a gz file
|
80
89
|
tmp = Tempfile.new("s3-")
|
81
|
-
tmp =
|
82
|
-
|
90
|
+
tmp =
|
91
|
+
if json? || msgpack?
|
92
|
+
create_gz_file_from_structured_data(tmp, chunk, @delimiter)
|
93
|
+
else
|
94
|
+
create_gz_file_from_flat_data(tmp, chunk)
|
95
|
+
end
|
83
96
|
|
84
97
|
# no data -> skip
|
85
98
|
unless tmp
|
@@ -93,6 +106,10 @@ class RedshiftOutput < BufferedOutput
|
|
93
106
|
# upload gz to s3
|
94
107
|
@bucket.objects[s3path].write(Pathname.new(tmp.path),
|
95
108
|
:acl => :bucket_owner_full_control)
|
109
|
+
|
110
|
+
# close temp file
|
111
|
+
tmp.close!
|
112
|
+
|
96
113
|
# copy gz on s3 to redshift
|
97
114
|
s3_uri = "s3://#{@s3_bucket}/#{s3path}"
|
98
115
|
sql = @copy_sql_template % [s3_uri, @aws_sec_key]
|
@@ -122,7 +139,11 @@ class RedshiftOutput < BufferedOutput
|
|
122
139
|
@file_type == 'json'
|
123
140
|
end
|
124
141
|
|
125
|
-
def
|
142
|
+
def msgpack?
|
143
|
+
@file_type == 'msgpack'
|
144
|
+
end
|
145
|
+
|
146
|
+
def create_gz_file_from_flat_data(dst_file, chunk)
|
126
147
|
gzw = nil
|
127
148
|
begin
|
128
149
|
gzw = Zlib::GzipWriter.new(dst_file)
|
@@ -133,13 +154,13 @@ class RedshiftOutput < BufferedOutput
|
|
133
154
|
dst_file
|
134
155
|
end
|
135
156
|
|
136
|
-
def
|
157
|
+
def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
|
137
158
|
# fetch the table definition from redshift
|
138
159
|
redshift_table_columns = fetch_table_columns
|
139
160
|
if redshift_table_columns == nil
|
140
161
|
raise "failed to fetch the redshift table definition."
|
141
162
|
elsif redshift_table_columns.empty?
|
142
|
-
$log.warn format_log("no table on redshift. table_name=#{
|
163
|
+
$log.warn format_log("no table on redshift. table_name=#{table_name_with_schema}")
|
143
164
|
return nil
|
144
165
|
end
|
145
166
|
|
@@ -149,10 +170,16 @@ class RedshiftOutput < BufferedOutput
|
|
149
170
|
gzw = Zlib::GzipWriter.new(dst_file)
|
150
171
|
chunk.msgpack_each do |record|
|
151
172
|
begin
|
152
|
-
|
173
|
+
hash = json? ? json_to_hash(record[@record_log_tag]) : record[@record_log_tag]
|
174
|
+
tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
|
153
175
|
gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
|
154
176
|
rescue => e
|
155
|
-
|
177
|
+
if json?
|
178
|
+
$log.error format_log("failed to create table text from json. text=(#{record[@record_log_tag]})"), :error=>$!.to_s
|
179
|
+
else
|
180
|
+
$log.error format_log("failed to create table text from msgpack. text=(#{record[@record_log_tag]})"), :error=>$!.to_s
|
181
|
+
end
|
182
|
+
|
156
183
|
$log.error_backtrace
|
157
184
|
end
|
158
185
|
end
|
@@ -165,7 +192,7 @@ class RedshiftOutput < BufferedOutput
|
|
165
192
|
|
166
193
|
def determine_delimiter(file_type)
|
167
194
|
case file_type
|
168
|
-
when 'json', 'tsv'
|
195
|
+
when 'json', 'msgpack', 'tsv'
|
169
196
|
"\t"
|
170
197
|
when "csv"
|
171
198
|
','
|
@@ -175,11 +202,10 @@ class RedshiftOutput < BufferedOutput
|
|
175
202
|
end
|
176
203
|
|
177
204
|
def fetch_table_columns
|
178
|
-
fetch_columns_sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
|
179
205
|
conn = PG.connect(@db_conf)
|
180
206
|
begin
|
181
207
|
columns = nil
|
182
|
-
conn.exec(
|
208
|
+
conn.exec(fetch_columns_sql_with_schema) do |result|
|
183
209
|
columns = result.collect{|row| row['column_name']}
|
184
210
|
end
|
185
211
|
columns
|
@@ -188,28 +214,39 @@ class RedshiftOutput < BufferedOutput
|
|
188
214
|
end
|
189
215
|
end
|
190
216
|
|
191
|
-
def
|
192
|
-
|
217
|
+
def fetch_columns_sql_with_schema
|
218
|
+
@fetch_columns_sql ||= if @redshift_schemaname
|
219
|
+
"select column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@redshift_schemaname}' and table_name = '#{@redshift_tablename}' order by ordinal_position;"
|
220
|
+
else
|
221
|
+
"select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
|
222
|
+
end
|
223
|
+
end
|
193
224
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
225
|
+
def json_to_hash(json_text)
|
226
|
+
return nil if json_text.to_s.empty?
|
227
|
+
|
228
|
+
JSON.parse(json_text)
|
229
|
+
rescue => e
|
230
|
+
$log.warn format_log("failed to parse json. "), :error => e.to_s
|
231
|
+
end
|
232
|
+
|
233
|
+
def hash_to_table_text(redshift_table_columns, hash, delimiter)
|
234
|
+
return "" unless hash
|
203
235
|
|
204
|
-
# extract values from
|
236
|
+
# extract values from hash
|
205
237
|
val_list = redshift_table_columns.collect do |cn|
|
206
|
-
val =
|
207
|
-
val = nil unless val and not val.to_s.empty?
|
238
|
+
val = hash[cn]
|
208
239
|
val = JSON.generate(val) if val.kind_of?(Hash) or val.kind_of?(Array)
|
209
|
-
|
240
|
+
|
241
|
+
if val.to_s.empty?
|
242
|
+
nil
|
243
|
+
else
|
244
|
+
val.to_s
|
245
|
+
end
|
210
246
|
end
|
247
|
+
|
211
248
|
if val_list.all?{|v| v.nil? or v.empty?}
|
212
|
-
$log.warn format_log("no data match for table columns on redshift.
|
249
|
+
$log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
|
213
250
|
return ""
|
214
251
|
end
|
215
252
|
|
@@ -238,6 +275,13 @@ class RedshiftOutput < BufferedOutput
|
|
238
275
|
s3path
|
239
276
|
end
|
240
277
|
|
278
|
+
def table_name_with_schema
|
279
|
+
@table_name_with_schema ||= if @redshift_schemaname
|
280
|
+
"#{@redshift_schemaname}.#{@redshift_tablename}"
|
281
|
+
else
|
282
|
+
@redshift_tablename
|
283
|
+
end
|
284
|
+
end
|
241
285
|
end
|
242
286
|
|
243
287
|
|
@@ -40,6 +40,15 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
40
40
|
#{CONFIG_BASE}
|
41
41
|
file_type json
|
42
42
|
]
|
43
|
+
CONFIG_JSON_WITH_SCHEMA = %[
|
44
|
+
#{CONFIG_BASE}
|
45
|
+
redshift_schemaname test_schema
|
46
|
+
file_type json
|
47
|
+
]
|
48
|
+
CONFIG_MSGPACK = %[
|
49
|
+
#{CONFIG_BASE}
|
50
|
+
file_type msgpack
|
51
|
+
]
|
43
52
|
CONFIG_PIPE_DELIMITER= %[
|
44
53
|
#{CONFIG_BASE}
|
45
54
|
delimiter |
|
@@ -57,6 +66,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
57
66
|
RECORD_TSV_B = {"log" => %[val_e\tval_f\tval_g\tval_h]}
|
58
67
|
RECORD_JSON_A = {"log" => %[{"key_a" : "val_a", "key_b" : "val_b"}]}
|
59
68
|
RECORD_JSON_B = {"log" => %[{"key_c" : "val_c", "key_d" : "val_d"}]}
|
69
|
+
RECORD_MSGPACK_A = {"key_a" => "val_a", "key_b" => "val_b"}
|
70
|
+
RECORD_MSGPACK_B = {"key_c" => "val_c", "key_d" => "val_d"}
|
60
71
|
DEFAULT_TIME = Time.parse("2013-03-06 12:15:02 UTC").to_i
|
61
72
|
|
62
73
|
def create_driver(conf = CONFIG, tag='test.input')
|
@@ -89,10 +100,17 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
89
100
|
assert_equal "test_user", d.instance.redshift_user
|
90
101
|
assert_equal "test_password", d.instance.redshift_password
|
91
102
|
assert_equal "test_table", d.instance.redshift_tablename
|
103
|
+
assert_equal nil, d.instance.redshift_schemaname
|
104
|
+
assert_equal "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS", d.instance.redshift_copy_base_options
|
105
|
+
assert_equal nil, d.instance.redshift_copy_options
|
92
106
|
assert_equal "csv", d.instance.file_type
|
93
107
|
assert_equal ",", d.instance.delimiter
|
94
108
|
assert_equal true, d.instance.utc
|
95
109
|
end
|
110
|
+
def test_configure_with_schemaname
|
111
|
+
d = create_driver(CONFIG_JSON_WITH_SCHEMA)
|
112
|
+
assert_equal "test_schema", d.instance.redshift_schemaname
|
113
|
+
end
|
96
114
|
def test_configure_localtime
|
97
115
|
d = create_driver(CONFIG_CSV.gsub(/ *utc */, ''))
|
98
116
|
assert_equal false, d.instance.utc
|
@@ -109,6 +127,14 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
109
127
|
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path log/'))
|
110
128
|
assert_equal "log/", d.instance.path
|
111
129
|
end
|
130
|
+
def test_configure_path_starts_with_slash
|
131
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log/'))
|
132
|
+
assert_equal "log/", d.instance.path
|
133
|
+
end
|
134
|
+
def test_configure_path_starts_with_slash_without_last_slash
|
135
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log'))
|
136
|
+
assert_equal "log/", d.instance.path
|
137
|
+
end
|
112
138
|
def test_configure_tsv
|
113
139
|
d1 = create_driver(CONFIG_TSV)
|
114
140
|
assert_equal "tsv", d1.instance.file_type
|
@@ -119,6 +145,11 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
119
145
|
assert_equal "json", d2.instance.file_type
|
120
146
|
assert_equal "\t", d2.instance.delimiter
|
121
147
|
end
|
148
|
+
def test_configure_msgpack
|
149
|
+
d2 = create_driver(CONFIG_MSGPACK)
|
150
|
+
assert_equal "msgpack", d2.instance.file_type
|
151
|
+
assert_equal "\t", d2.instance.delimiter
|
152
|
+
end
|
122
153
|
def test_configure_original_file_type
|
123
154
|
d3 = create_driver(CONFIG_PIPE_DELIMITER)
|
124
155
|
assert_equal nil, d3.instance.file_type
|
@@ -145,6 +176,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
145
176
|
d.emit(RECORD_JSON_A, DEFAULT_TIME)
|
146
177
|
d.emit(RECORD_JSON_B, DEFAULT_TIME)
|
147
178
|
end
|
179
|
+
def emit_msgpack(d)
|
180
|
+
d.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
|
181
|
+
d.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
182
|
+
end
|
148
183
|
|
149
184
|
def test_format_csv
|
150
185
|
d_csv = create_driver_no_write(CONFIG_CSV)
|
@@ -168,15 +203,53 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
168
203
|
d_json.run
|
169
204
|
end
|
170
205
|
|
206
|
+
def test_format_msgpack
|
207
|
+
d_msgpack = create_driver_no_write(CONFIG_MSGPACK)
|
208
|
+
emit_msgpack(d_msgpack)
|
209
|
+
d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_A }.to_msgpack)
|
210
|
+
d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_B }.to_msgpack)
|
211
|
+
d_msgpack.run
|
212
|
+
end
|
213
|
+
|
171
214
|
class PGConnectionMock
|
172
|
-
def initialize(
|
173
|
-
@return_keys = return_keys
|
215
|
+
def initialize(options = {})
|
216
|
+
@return_keys = options[:return_keys] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
|
217
|
+
@target_schema = options[:schemaname] || nil
|
218
|
+
@target_table = options[:tablename] || 'test_table'
|
174
219
|
end
|
220
|
+
|
221
|
+
def expected_column_list_query
|
222
|
+
if @target_schema
|
223
|
+
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@target_schema}' and table_name = '#{@target_table}'/
|
224
|
+
else
|
225
|
+
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@target_table}'/
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def expected_copy_query
|
230
|
+
if @target_schema
|
231
|
+
/\Acopy #{@target_schema}.#{@target_table} from/
|
232
|
+
else
|
233
|
+
/\Acopy #{@target_table} from/
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
175
237
|
def exec(sql, &block)
|
176
|
-
if block_given?
|
177
|
-
|
238
|
+
if block_given?
|
239
|
+
if sql =~ expected_column_list_query
|
240
|
+
yield @return_keys.collect{|key| {'column_name' => key}}
|
241
|
+
else
|
242
|
+
yield []
|
243
|
+
end
|
244
|
+
else
|
245
|
+
unless sql =~ expected_copy_query
|
246
|
+
error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
|
247
|
+
error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
|
248
|
+
raise error
|
249
|
+
end
|
178
250
|
end
|
179
251
|
end
|
252
|
+
|
180
253
|
def close
|
181
254
|
end
|
182
255
|
end
|
@@ -228,12 +301,17 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
228
301
|
end
|
229
302
|
end
|
230
303
|
|
304
|
+
def setup_tempfile_mock_to_be_closed
|
305
|
+
flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
|
306
|
+
end
|
307
|
+
|
231
308
|
def setup_mocks(expected_data)
|
232
309
|
setup_pg_mock
|
233
310
|
setup_s3_mock(expected_data) end
|
234
311
|
|
235
312
|
def test_write_with_csv
|
236
313
|
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
314
|
+
setup_tempfile_mock_to_be_closed
|
237
315
|
d_csv = create_driver
|
238
316
|
emit_csv(d_csv)
|
239
317
|
assert_equal true, d_csv.run
|
@@ -241,6 +319,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
241
319
|
|
242
320
|
def test_write_with_json
|
243
321
|
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
|
322
|
+
setup_tempfile_mock_to_be_closed
|
244
323
|
d_json = create_driver(CONFIG_JSON)
|
245
324
|
emit_json(d_json)
|
246
325
|
assert_equal true, d_json.run
|
@@ -294,6 +373,53 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
294
373
|
assert_equal true, d_json.run
|
295
374
|
end
|
296
375
|
|
376
|
+
def test_write_with_msgpack
|
377
|
+
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
|
378
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
379
|
+
emit_msgpack(d_msgpack)
|
380
|
+
assert_equal true, d_msgpack.run
|
381
|
+
end
|
382
|
+
|
383
|
+
def test_write_with_msgpack_hash_value
|
384
|
+
setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
385
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
386
|
+
d_msgpack.emit({"key_a" => "val_a", "key_b" => {"foo" => "var"}} , DEFAULT_TIME)
|
387
|
+
d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
388
|
+
assert_equal true, d_msgpack.run
|
389
|
+
end
|
390
|
+
|
391
|
+
def test_write_with_msgpack_array_value
|
392
|
+
setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
393
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
394
|
+
d_msgpack.emit({"key_a" => "val_a", "key_b" => ["foo", "var"]} , DEFAULT_TIME)
|
395
|
+
d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
396
|
+
assert_equal true, d_msgpack.run
|
397
|
+
end
|
398
|
+
|
399
|
+
def test_write_with_msgpack_including_tab_newline_quote
|
400
|
+
setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
401
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
402
|
+
d_msgpack.emit({"key_a" => "val_a_with_\t_tab_\n_newline", "key_b" => "val_b_with_\\_quote"} , DEFAULT_TIME)
|
403
|
+
d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
404
|
+
assert_equal true, d_msgpack.run
|
405
|
+
end
|
406
|
+
|
407
|
+
def test_write_with_msgpack_no_data
|
408
|
+
setup_mocks("")
|
409
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
410
|
+
d_msgpack.emit({}, DEFAULT_TIME)
|
411
|
+
d_msgpack.emit({}, DEFAULT_TIME)
|
412
|
+
assert_equal false, d_msgpack.run
|
413
|
+
end
|
414
|
+
|
415
|
+
def test_write_with_msgpack_no_available_data
|
416
|
+
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
|
417
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
418
|
+
d_msgpack.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
|
419
|
+
d_msgpack.emit({"key_o" => "val_o", "key_p" => "val_p"}, DEFAULT_TIME)
|
420
|
+
assert_equal true, d_msgpack.run
|
421
|
+
end
|
422
|
+
|
297
423
|
def test_write_redshift_connection_error
|
298
424
|
def PG.connect(dbinfo)
|
299
425
|
return Class.new do
|
@@ -388,4 +514,13 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
388
514
|
}
|
389
515
|
end
|
390
516
|
|
517
|
+
def test_write_with_json_fetch_column_with_schema
|
518
|
+
def PG.connect(dbinfo)
|
519
|
+
return PGConnectionMock.new(:schemaname => 'test_schema')
|
520
|
+
end
|
521
|
+
setup_s3_mock(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
|
522
|
+
d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
|
523
|
+
emit_json(d_json)
|
524
|
+
assert_equal true, d_json.run
|
525
|
+
end
|
391
526
|
end
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Masashi Miyazaki
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-01-22 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: fluentd
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ~>
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ~>
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,23 +27,20 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: aws-sdk
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: 1.6.3
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: 1.6.3
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: pg
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
45
|
- - ~>
|
52
46
|
- !ruby/object:Gem::Version
|
@@ -54,7 +48,6 @@ dependencies:
|
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
52
|
- - ~>
|
60
53
|
- !ruby/object:Gem::Version
|
@@ -62,49 +55,43 @@ dependencies:
|
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: rake
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - '>='
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - '>='
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: simplecov
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
|
-
- -
|
73
|
+
- - '>='
|
84
74
|
- !ruby/object:Gem::Version
|
85
75
|
version: 0.5.4
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
|
-
- -
|
80
|
+
- - '>='
|
92
81
|
- !ruby/object:Gem::Version
|
93
82
|
version: 0.5.4
|
94
83
|
- !ruby/object:Gem::Dependency
|
95
84
|
name: flexmock
|
96
85
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
86
|
requirements:
|
99
|
-
- -
|
87
|
+
- - '>='
|
100
88
|
- !ruby/object:Gem::Version
|
101
89
|
version: 1.3.1
|
102
90
|
type: :development
|
103
91
|
prerelease: false
|
104
92
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
93
|
requirements:
|
107
|
-
- -
|
94
|
+
- - '>='
|
108
95
|
- !ruby/object:Gem::Version
|
109
96
|
version: 1.3.1
|
110
97
|
description: Amazon Redshift output plugin for Fluentd
|
@@ -125,27 +112,26 @@ files:
|
|
125
112
|
- test/test_helper.rb
|
126
113
|
homepage: https://github.com/hapyrus/fluent-plugin-redshift
|
127
114
|
licenses: []
|
115
|
+
metadata: {}
|
128
116
|
post_install_message:
|
129
117
|
rdoc_options: []
|
130
118
|
require_paths:
|
131
119
|
- lib
|
132
120
|
required_ruby_version: !ruby/object:Gem::Requirement
|
133
|
-
none: false
|
134
121
|
requirements:
|
135
|
-
- -
|
122
|
+
- - '>='
|
136
123
|
- !ruby/object:Gem::Version
|
137
124
|
version: '0'
|
138
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
-
none: false
|
140
126
|
requirements:
|
141
|
-
- -
|
127
|
+
- - '>='
|
142
128
|
- !ruby/object:Gem::Version
|
143
129
|
version: '0'
|
144
130
|
requirements: []
|
145
131
|
rubyforge_project:
|
146
|
-
rubygems_version: 1.
|
132
|
+
rubygems_version: 2.1.7
|
147
133
|
signing_key:
|
148
|
-
specification_version:
|
134
|
+
specification_version: 4
|
149
135
|
summary: Amazon Redshift output plugin for Fluentd
|
150
136
|
test_files:
|
151
137
|
- test/plugin/test_out_redshift.rb
|