fluent-plugin-redshift-v2 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +83 -0
- data/README.md +36 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/fluent-plugin-redshift-v2.gemspec +33 -0
- data/lib/fluent/plugin/out_redshift_v2.rb +428 -0
- metadata +165 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 57c709178f9a8c9dabb068f2078085d8e710cd12
|
4
|
+
data.tar.gz: 06eed04b9965523dc3d980f1745792037e19ce20
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 76a83c7b4a84156bc7d0a93a2065ebefec2f9c7f0408792954524d0ec11e2a94c054c93a8afa9015e3131e2d0ef2b6cf9c827012e59d6050b314ac7f85f50e97
|
7
|
+
data.tar.gz: 2c13a1104dd19e7810c50328cb1f113873fb174c621f79991fa179c2d87c671a7de7c21093b5e9c20fecb1b468d21e8c7e165393e977112341caa0a2e5938a6d
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fluent-plugin-redshift-v2 (0.1.0)
|
5
|
+
aws-sdk
|
6
|
+
fluentd
|
7
|
+
pg
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
aws-sdk (2.6.42)
|
13
|
+
aws-sdk-resources (= 2.6.42)
|
14
|
+
aws-sdk-core (2.6.42)
|
15
|
+
aws-sigv4 (~> 1.0)
|
16
|
+
jmespath (~> 1.0)
|
17
|
+
aws-sdk-resources (2.6.42)
|
18
|
+
aws-sdk-core (= 2.6.42)
|
19
|
+
aws-sigv4 (1.0.0)
|
20
|
+
builder (3.2.3)
|
21
|
+
cool.io (1.5.1)
|
22
|
+
diff-lcs (1.3)
|
23
|
+
fakes3 (1.2.0)
|
24
|
+
builder
|
25
|
+
thor
|
26
|
+
fluentd (0.14.21)
|
27
|
+
cool.io (>= 1.4.5, < 2.0.0)
|
28
|
+
http_parser.rb (>= 0.5.1, < 0.7.0)
|
29
|
+
msgpack (>= 0.7.0, < 2.0.0)
|
30
|
+
ruby_dig (~> 0.0.2)
|
31
|
+
serverengine (>= 2.0.4, < 3.0.0)
|
32
|
+
sigdump (~> 0.2.2)
|
33
|
+
strptime (~> 0.1.7)
|
34
|
+
tzinfo (~> 1.0)
|
35
|
+
tzinfo-data (~> 1.0)
|
36
|
+
yajl-ruby (~> 1.0)
|
37
|
+
http_parser.rb (0.6.0)
|
38
|
+
jmespath (1.3.1)
|
39
|
+
msgpack (1.1.0)
|
40
|
+
pg (0.21.0)
|
41
|
+
power_assert (1.0.2)
|
42
|
+
rake (10.5.0)
|
43
|
+
rspec (3.7.0)
|
44
|
+
rspec-core (~> 3.7.0)
|
45
|
+
rspec-expectations (~> 3.7.0)
|
46
|
+
rspec-mocks (~> 3.7.0)
|
47
|
+
rspec-core (3.7.0)
|
48
|
+
rspec-support (~> 3.7.0)
|
49
|
+
rspec-expectations (3.7.0)
|
50
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
51
|
+
rspec-support (~> 3.7.0)
|
52
|
+
rspec-mocks (3.7.0)
|
53
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
54
|
+
rspec-support (~> 3.7.0)
|
55
|
+
rspec-support (3.7.0)
|
56
|
+
ruby_dig (0.0.2)
|
57
|
+
serverengine (2.0.5)
|
58
|
+
sigdump (~> 0.2.2)
|
59
|
+
sigdump (0.2.4)
|
60
|
+
strptime (0.1.9)
|
61
|
+
test-unit (3.2.5)
|
62
|
+
power_assert
|
63
|
+
thor (0.20.0)
|
64
|
+
thread_safe (0.3.6)
|
65
|
+
tzinfo (1.2.3)
|
66
|
+
thread_safe (~> 0.1)
|
67
|
+
tzinfo-data (1.2017.2)
|
68
|
+
tzinfo (>= 1.0.0)
|
69
|
+
yajl-ruby (1.3.0)
|
70
|
+
|
71
|
+
PLATFORMS
|
72
|
+
ruby
|
73
|
+
|
74
|
+
DEPENDENCIES
|
75
|
+
bundler (~> 1.13)
|
76
|
+
fakes3
|
77
|
+
fluent-plugin-redshift-v2!
|
78
|
+
rake (~> 10.0)
|
79
|
+
rspec (~> 3.0)
|
80
|
+
test-unit
|
81
|
+
|
82
|
+
BUNDLED WITH
|
83
|
+
1.13.7
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Fluent::Plugin::Redshift::V2
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/fluent/plugin/redshift/v2`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'fluent-plugin-redshift-v2'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install fluent-plugin-redshift-v2
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/fluent-plugin-redshift-v2.
|
36
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "fluent/plugin/redshift/v2"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "fluent-plugin-redshift-v2"
|
7
|
+
spec.version = "0.1.0"
|
8
|
+
spec.authors = ["Jun Yokoyama"]
|
9
|
+
spec.email = ["jun@larus.org"]
|
10
|
+
|
11
|
+
spec.description = %q{Amazon Redshift output plugin for Fluentd (inspired by fluent-plugin-redshift)}
|
12
|
+
spec.summary = spec.description
|
13
|
+
spec.homepage = "https://github.com/nysalor/fluent-plugin-redshift-v2"
|
14
|
+
|
15
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
16
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
19
|
+
f.match(%r{^(test|spec|features)/})
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.13"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
28
|
+
spec.add_development_dependency "test-unit"
|
29
|
+
spec.add_development_dependency "fakes3"
|
30
|
+
spec.add_dependency "fluentd"
|
31
|
+
spec.add_dependency "aws-sdk"
|
32
|
+
spec.add_dependency "pg"
|
33
|
+
end
|
@@ -0,0 +1,428 @@
|
|
1
|
+
class Fluent::Plugin::RedshiftOutputV2 < Fluent::BufferedOutput
|
2
|
+
Fluent::Plugin.register_output('redshift_v2', self)
|
3
|
+
|
4
|
+
attr_reader :last_sql, :last_gz_path
|
5
|
+
|
6
|
+
config_param :record_log_tag, :string, default: 'log'
|
7
|
+
|
8
|
+
# s3
|
9
|
+
config_param :aws_key_id, :string, secret: true, default: nil, desc: "AWS access key id to access s3 bucket."
|
10
|
+
config_param :aws_sec_key, :string, secret: true, default: nil, desc: "AWS secret key id to access s3 bucket."
|
11
|
+
config_param :aws_iam_role, :string, secret: true, default: nil, desc: "AWS IAM Role to access s3 bucket."
|
12
|
+
config_param :s3_region, :string, desc: 'AWS region name.'
|
13
|
+
config_param :s3_bucket, :string, desc: 'bucket name. S3 bucket must be same as the region of your Redshift cluster.'
|
14
|
+
config_param :s3_endpoint, :string, default: nil, desc: "S3 endpoint."
|
15
|
+
config_param :path, :string, default: "", desc: "S3 path to input."
|
16
|
+
config_param :timestamp_key_format, :string, default: 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M', desc: 'The format of the object keys. It can include date-format directives.'
|
17
|
+
config_param :utc, :bool, default: false
|
18
|
+
config_param :s3_server_side_encryption, :string, default: nil, desc: "S3 Server-Side Encryption (Only aes256 is supported)."
|
19
|
+
|
20
|
+
# redshift
|
21
|
+
config_param :redshift_host, :string, desc: "The end point(or hostname) of your Amazon Redshift cluster."
|
22
|
+
config_param :redshift_port, :integer, default: 5439, desc: "Port number."
|
23
|
+
config_param :redshift_dbname, :string, desc: "Database name."
|
24
|
+
config_param :redshift_user, :string, desc: "User name."
|
25
|
+
config_param :redshift_password, :string, secret: true, desc: "Password for the user name."
|
26
|
+
config_param :redshift_tablename, :string, desc: "Table name to store data."
|
27
|
+
config_param :redshift_schemaname, :string, default: nil, desc: 'Schema name to store data. By default, this option is not set and find table without schema as your own search_path.'
|
28
|
+
config_param :redshift_copy_base_options, :string , default: "ESCAPE FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
|
29
|
+
config_param :redshift_copy_options, :string , default: nil
|
30
|
+
config_param :redshift_connect_timeout, :integer, default: 10, desc: "Maximum time to wait for connection to succeed."
|
31
|
+
config_param :redshift_copy_columns, :string, default: nil, desc: 'Columns for copying. Value needs to be comma-separated like id,name,age'
|
32
|
+
|
33
|
+
# file format
|
34
|
+
config_param :file_type, :string, default: nil, desc: "File format of the source data. csv, tsv, msgpack or json are available."
|
35
|
+
config_param :delimiter, :string, default: nil, desc: 'Delimiter of the source data. This option will be ignored if file_type is specified. '
|
36
|
+
|
37
|
+
# for debug
|
38
|
+
config_param :log_suffix, :string, default: ''
|
39
|
+
|
40
|
+
def initialize
|
41
|
+
super
|
42
|
+
|
43
|
+
require 'aws-sdk'
|
44
|
+
require 'zlib'
|
45
|
+
require 'time'
|
46
|
+
require 'tempfile'
|
47
|
+
require 'pg'
|
48
|
+
require 'json'
|
49
|
+
require 'csv'
|
50
|
+
end
|
51
|
+
|
52
|
+
def configure(conf)
|
53
|
+
super
|
54
|
+
if !check_credentials
|
55
|
+
fail ConfigError, "aws_key_id and aws_sec_key is required. or, use aws_iam_role instead."
|
56
|
+
end
|
57
|
+
@path = "#{@path}/" unless @path.end_with?('/')
|
58
|
+
@path = @path[1..-1] if @path.start_with?('/')
|
59
|
+
@utc = true if conf['utc']
|
60
|
+
@db_conf = {
|
61
|
+
host: @redshift_host,
|
62
|
+
port: @redshift_port,
|
63
|
+
dbname: @redshift_dbname,
|
64
|
+
user: @redshift_user,
|
65
|
+
password: @redshift_password,
|
66
|
+
connect_timeout: @redshift_connect_timeout,
|
67
|
+
hostaddr: IPSocket.getaddress(@redshift_host)
|
68
|
+
}
|
69
|
+
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
70
|
+
$log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
|
71
|
+
@table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
|
72
|
+
@redshift_copy_columns = if @redshift_copy_columns.to_s.empty?
|
73
|
+
nil
|
74
|
+
else
|
75
|
+
@redshift_copy_columns.split(/[,\s]+/)
|
76
|
+
end
|
77
|
+
@copy_sql_template = build_redshift_copy_sql_template
|
78
|
+
@s3_server_side_encryption = @s3_server_side_encryption.to_sym if @s3_server_side_encryption
|
79
|
+
end
|
80
|
+
|
81
|
+
def start
|
82
|
+
super
|
83
|
+
|
84
|
+
options = {}
|
85
|
+
if @aws_key_id && @aws_sec_key
|
86
|
+
options = {
|
87
|
+
access_key_id: @aws_key_id,
|
88
|
+
secret_access_key: @aws_sec_key,
|
89
|
+
force_path_style: true,
|
90
|
+
region: @s3_region
|
91
|
+
}
|
92
|
+
end
|
93
|
+
options[:endpoint] = @s3_endpoint if @s3_endpoint
|
94
|
+
@s3_client = Aws::S3::Client.new(options)
|
95
|
+
@redshift_connection = RedshiftConnection.new(@db_conf)
|
96
|
+
end
|
97
|
+
|
98
|
+
def shutdown
|
99
|
+
end
|
100
|
+
|
101
|
+
def format(_tag, _time, record)
|
102
|
+
if json?
|
103
|
+
record.to_msgpack
|
104
|
+
elsif msgpack?
|
105
|
+
{ @record_log_tag => record }.to_msgpack
|
106
|
+
else
|
107
|
+
"#{record[@record_log_tag]}\n"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def write(chunk)
|
112
|
+
insert_logs chunk
|
113
|
+
end
|
114
|
+
|
115
|
+
def try_write(chunk)
|
116
|
+
insert_logs chunk
|
117
|
+
commit_write chunk.unique_id
|
118
|
+
end
|
119
|
+
|
120
|
+
def insert_logs(chunk)
|
121
|
+
$log.debug format_log("start creating gz.")
|
122
|
+
exec_copy s3_uri(create_gz_file(chunk))
|
123
|
+
end
|
124
|
+
|
125
|
+
def create_gz_file(chunk)
|
126
|
+
tmp = Tempfile.new("s3-")
|
127
|
+
tmp =
|
128
|
+
if json? || msgpack?
|
129
|
+
create_gz_file_from_structured_data(tmp, chunk)
|
130
|
+
else
|
131
|
+
create_gz_file_from_flat_data(tmp, chunk)
|
132
|
+
end
|
133
|
+
|
134
|
+
if tmp
|
135
|
+
key = next_gz_path
|
136
|
+
@s3_client.put_object({
|
137
|
+
server_side_encryption: @s3_server_side_encryption,
|
138
|
+
bucket: @s3_bucket,
|
139
|
+
body: tmp,
|
140
|
+
key: key
|
141
|
+
})
|
142
|
+
|
143
|
+
tmp.close!
|
144
|
+
@last_gz_path = key
|
145
|
+
else
|
146
|
+
$log.debug format_log("received no valid data. ")
|
147
|
+
return false
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def next_gz_path
|
152
|
+
timestamp_key = (@utc) ? Time.now.utc.strftime(@timestamp_key_format) : Time.now.strftime(@timestamp_key_format)
|
153
|
+
i = 0
|
154
|
+
path = ''
|
155
|
+
loop do
|
156
|
+
path = "#{@path}#{timestamp_key}_#{'%02d' % i}.gz"
|
157
|
+
begin
|
158
|
+
@s3_client.head_object(key: path, bucket: @s3_bucket)
|
159
|
+
i += 1
|
160
|
+
rescue Aws::S3::Errors::NotFound
|
161
|
+
break
|
162
|
+
end
|
163
|
+
end
|
164
|
+
path
|
165
|
+
end
|
166
|
+
|
167
|
+
def exec_copy(s3_uri)
|
168
|
+
$log.debug format_log("start copying. s3_uri=#{s3_uri}")
|
169
|
+
begin
|
170
|
+
@redshift_connection.exec copy_sql(s3_uri)
|
171
|
+
$log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
|
172
|
+
true
|
173
|
+
rescue RedshiftError => e
|
174
|
+
if e.to_s =~ /^ERROR: Load into table '[^']+' failed\./
|
175
|
+
$log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), error:e.to_s
|
176
|
+
return false
|
177
|
+
end
|
178
|
+
raise e
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def s3_uri(path)
|
183
|
+
"s3://#{@s3_bucket}/#{path}"
|
184
|
+
end
|
185
|
+
|
186
|
+
def copy_sql(s3_uri)
|
187
|
+
@last_sql = @copy_sql_template % s3_uri
|
188
|
+
end
|
189
|
+
|
190
|
+
def format_log(message)
|
191
|
+
if @log_suffix && !@log_suffix.empty?
|
192
|
+
"#{message} #{@log_suffix}"
|
193
|
+
else
|
194
|
+
message
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def formatted_to_msgpack_binary
|
199
|
+
true
|
200
|
+
end
|
201
|
+
|
202
|
+
private
|
203
|
+
|
204
|
+
def check_credentials
|
205
|
+
if @aws_key_id && @aws_sec_key
|
206
|
+
true
|
207
|
+
elsif @aws_iam_role
|
208
|
+
true
|
209
|
+
else
|
210
|
+
false
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def determine_delimiter(file_type)
|
215
|
+
case file_type
|
216
|
+
when 'json', 'msgpack', 'tsv'
|
217
|
+
"\t"
|
218
|
+
when "csv"
|
219
|
+
','
|
220
|
+
else
|
221
|
+
raise Fluent::ConfigError, "Invalid file_type:#{file_type}."
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def build_redshift_copy_sql_template
|
226
|
+
copy_columns = if @redshift_copy_columns
|
227
|
+
"(#{@redshift_copy_columns.join(",")})"
|
228
|
+
else
|
229
|
+
''
|
230
|
+
end
|
231
|
+
credentials = if @aws_key_id && @aws_sec_key
|
232
|
+
"CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=#{@aws_sec_key}'"
|
233
|
+
else
|
234
|
+
"CREDENTIALS 'aws_iam_role=#{@aws_iam_role}'"
|
235
|
+
end
|
236
|
+
escape = if !@redshift_copy_base_options.include?('ESCAPE') && (json? || msgpack?)
|
237
|
+
" ESCAPE"
|
238
|
+
else
|
239
|
+
''
|
240
|
+
end
|
241
|
+
|
242
|
+
"copy #{@table_name_with_schema}#{copy_columns} from '%s' #{credentials} delimiter '#{@delimiter}' GZIP#{escape} #{@redshift_copy_base_options} #{@redshift_copy_options};"
|
243
|
+
end
|
244
|
+
|
245
|
+
def json?
|
246
|
+
@file_type == 'json'
|
247
|
+
end
|
248
|
+
|
249
|
+
def msgpack?
|
250
|
+
@file_type == 'msgpack'
|
251
|
+
end
|
252
|
+
|
253
|
+
def create_gz_file_from_flat_data(dst_file, chunk)
|
254
|
+
gzw = nil
|
255
|
+
begin
|
256
|
+
gzw = Zlib::GzipWriter.new(dst_file)
|
257
|
+
chunk.write_to(gzw)
|
258
|
+
ensure
|
259
|
+
gzw.close rescue nil if gzw
|
260
|
+
end
|
261
|
+
dst_file
|
262
|
+
end
|
263
|
+
|
264
|
+
def create_gz_file_from_structured_data(dst_file, chunk)
|
265
|
+
redshift_table_columns = fetch_table_columns
|
266
|
+
if redshift_table_columns == nil
|
267
|
+
raise "failed to fetch the redshift table definition."
|
268
|
+
elsif redshift_table_columns.empty?
|
269
|
+
$log.warn format_log("no table on redshift or cannot access table. table_name=#{@table_name_with_schema}")
|
270
|
+
return nil
|
271
|
+
end
|
272
|
+
|
273
|
+
if @redshift_copy_columns
|
274
|
+
unknown_colmns = @redshift_copy_columns - redshift_table_columns
|
275
|
+
unless unknown_colmns.empty?
|
276
|
+
fail Fluent::ConfigError, "missing columns included in redshift_copy_columns - missing columns:\"#{unknown_colmns.join(',')}\""
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
gzw = nil
|
281
|
+
begin
|
282
|
+
gzw = Zlib::GzipWriter.new(dst_file)
|
283
|
+
chunk.msgpack_each do |record|
|
284
|
+
next unless record
|
285
|
+
begin
|
286
|
+
tsv_text = hash_to_table_text(record, redshift_table_columns)
|
287
|
+
gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
|
288
|
+
rescue => e
|
289
|
+
text = record.is_a?(Hash) ? record[@record_log_tag] : record
|
290
|
+
$log.error format_log("failed to create table text from #{@file_type}. text=(#{text})"), error:e.to_s
|
291
|
+
$log.error_backtrace
|
292
|
+
end
|
293
|
+
end
|
294
|
+
return nil unless gzw.pos > 0
|
295
|
+
ensure
|
296
|
+
gzw.close rescue nil if gzw
|
297
|
+
end
|
298
|
+
dst_file
|
299
|
+
end
|
300
|
+
|
301
|
+
def fetch_table_columns
|
302
|
+
@redshift_connection.exec(fetch_columns_sql) do |result|
|
303
|
+
result.map { |row| row['column_name'] }
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
def fetch_columns_sql
|
308
|
+
sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}'"
|
309
|
+
sql << " and table_schema = '#{@redshift_schemaname}'" if @redshift_schemaname
|
310
|
+
sql << " order by ordinal_position;"
|
311
|
+
@last_sql = sql
|
312
|
+
sql
|
313
|
+
end
|
314
|
+
|
315
|
+
def hash_to_table_text(hash, redshift_table_columns)
|
316
|
+
if hash
|
317
|
+
values = redshift_table_columns.map { |cn| hash[cn] }
|
318
|
+
|
319
|
+
if values.compact.empty?
|
320
|
+
$log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
|
321
|
+
return ''
|
322
|
+
else
|
323
|
+
generate_line_with_delimiter(values, delimiter)
|
324
|
+
end
|
325
|
+
else
|
326
|
+
''
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def generate_line_with_delimiter(val_list, delimiter)
|
331
|
+
val_list.collect do |val|
|
332
|
+
case val
|
333
|
+
when nil
|
334
|
+
"\\N"
|
335
|
+
when ''
|
336
|
+
''
|
337
|
+
when Hash, Array
|
338
|
+
escape_text_for_copy(JSON.generate(val))
|
339
|
+
else
|
340
|
+
escape_text_for_copy(val.to_s)
|
341
|
+
end
|
342
|
+
end.join(delimiter) + "\n"
|
343
|
+
end
|
344
|
+
|
345
|
+
def escape_text_for_copy(val)
|
346
|
+
val.gsub(/\\|\t|\n/, {"\\" => "\\\\", "\t" => "\\\t", "\n" => "\\\n"}) # escape tab, newline and backslash
|
347
|
+
end
|
348
|
+
|
349
|
+
end
|
350
|
+
|
351
|
+
class RedshiftError < StandardError
|
352
|
+
def initialize(msg)
|
353
|
+
case msg
|
354
|
+
when PG::Error
|
355
|
+
@pg_error = msg
|
356
|
+
super(msg.to_s)
|
357
|
+
set_backtrace(msg.backtrace)
|
358
|
+
else
|
359
|
+
super
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
attr_accessor :pg_error
|
364
|
+
end
|
365
|
+
|
366
|
+
class RedshiftConnection
|
367
|
+
def initialize(db_conf)
|
368
|
+
@db_conf = db_conf
|
369
|
+
@connection = nil
|
370
|
+
end
|
371
|
+
|
372
|
+
attr_reader :db_conf
|
373
|
+
|
374
|
+
def exec(sql, &block)
|
375
|
+
conn = @connection
|
376
|
+
conn = create_redshift_connection if conn.nil?
|
377
|
+
if block
|
378
|
+
conn.exec(sql) {|result| block.call(result)}
|
379
|
+
else
|
380
|
+
conn.exec(sql)
|
381
|
+
end
|
382
|
+
rescue PG::Error => e
|
383
|
+
raise RedshiftError.new(e)
|
384
|
+
ensure
|
385
|
+
conn.close if conn && @connection.nil?
|
386
|
+
end
|
387
|
+
|
388
|
+
def connect_start
|
389
|
+
@connection = create_redshift_connection
|
390
|
+
end
|
391
|
+
|
392
|
+
def close
|
393
|
+
@connection.close rescue nil if @connection
|
394
|
+
@connection = nil
|
395
|
+
end
|
396
|
+
|
397
|
+
private
|
398
|
+
|
399
|
+
def create_redshift_connection
|
400
|
+
conn = PG::Connection.connect_start(db_conf)
|
401
|
+
raise RedshiftError.new("Unable to create a new connection.") unless conn
|
402
|
+
raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ]) if conn.status == PG::CONNECTION_BAD
|
403
|
+
|
404
|
+
socket = conn.socket_io
|
405
|
+
poll_status = PG::PGRES_POLLING_WRITING
|
406
|
+
until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
|
407
|
+
case poll_status
|
408
|
+
when PG::PGRES_POLLING_READING
|
409
|
+
io = IO.select([socket], nil, nil, db_conf[:connect_timeout])
|
410
|
+
raise RedshiftError.new("Asynchronous connection timed out!(READING)") unless io
|
411
|
+
when PG::PGRES_POLLING_WRITING
|
412
|
+
io = IO.select(nil, [socket], nil, db_conf[:connect_timeout])
|
413
|
+
raise RedshiftError.new("Asynchronous connection timed out!(WRITING)") unless io
|
414
|
+
end
|
415
|
+
poll_status = conn.connect_poll
|
416
|
+
end
|
417
|
+
|
418
|
+
unless conn.status == PG::CONNECTION_OK
|
419
|
+
raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
|
420
|
+
end
|
421
|
+
|
422
|
+
conn
|
423
|
+
rescue => e
|
424
|
+
conn.close rescue nil if conn
|
425
|
+
raise RedshiftError.new(e) if e.kind_of?(PG::Error)
|
426
|
+
raise e
|
427
|
+
end
|
428
|
+
end
|
metadata
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-redshift-v2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jun Yokoyama
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-11-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.13'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: test-unit
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: fakes3
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: fluentd
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: aws-sdk
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: pg
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
description: Amazon Redshift output plugin for Fluentd (inspired by fluent-plugin-redshift)
|
126
|
+
email:
|
127
|
+
- jun@larus.org
|
128
|
+
executables: []
|
129
|
+
extensions: []
|
130
|
+
extra_rdoc_files: []
|
131
|
+
files:
|
132
|
+
- ".gitignore"
|
133
|
+
- ".rspec"
|
134
|
+
- Gemfile
|
135
|
+
- Gemfile.lock
|
136
|
+
- README.md
|
137
|
+
- Rakefile
|
138
|
+
- bin/console
|
139
|
+
- bin/setup
|
140
|
+
- fluent-plugin-redshift-v2.gemspec
|
141
|
+
- lib/fluent/plugin/out_redshift_v2.rb
|
142
|
+
homepage: https://github.com/nysalor/fluent-plugin-redshift-v2
|
143
|
+
licenses: []
|
144
|
+
metadata: {}
|
145
|
+
post_install_message:
|
146
|
+
rdoc_options: []
|
147
|
+
require_paths:
|
148
|
+
- lib
|
149
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
requirements: []
|
160
|
+
rubyforge_project:
|
161
|
+
rubygems_version: 2.6.8
|
162
|
+
signing_key:
|
163
|
+
specification_version: 4
|
164
|
+
summary: Amazon Redshift output plugin for Fluentd (inspired by fluent-plugin-redshift)
|
165
|
+
test_files: []
|