fluent-plugin-s3-input 0.0.1 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +28 -26
- data/README.md +11 -0
- data/fluent-plugin-s3-input.gemspec +2 -1
- data/lib/fluent/plugin/out_s3_input.rb +88 -19
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2cd64e1194e16d1821e054cf9dde5cd870ac1475
|
4
|
+
data.tar.gz: d91e30a771fa8afa9000b216b777c3bbc5f3c6cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7425c4192cf17c805da883e8cd97ee86956a2f00992503f9e4c74895da7ce039665a941fac78aab879fe56323c39c29bc202feb2e3f8b000fe9a41023398c32a
|
7
|
+
data.tar.gz: f825c630de89ba79c732f97a85a230ce62d604ba3c81e988e8fdf729a82f9a69288004de65035e0b27a2bb8f1ba6e7d53c704e8dccdf4f22b23e5694139a5e7d
|
data/Gemfile.lock
CHANGED
@@ -1,48 +1,50 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fluent-plugin-s3-input (0.0.
|
4
|
+
fluent-plugin-s3-input (0.0.11)
|
5
5
|
aws-sdk
|
6
6
|
fluentd
|
7
7
|
oj
|
8
|
+
rubyzip
|
8
9
|
|
9
10
|
GEM
|
10
11
|
remote: https://rubygems.org/
|
11
12
|
specs:
|
12
|
-
aws-sdk (2.
|
13
|
-
aws-sdk-resources (= 2.
|
14
|
-
aws-sdk-core (2.
|
13
|
+
aws-sdk (2.9.21)
|
14
|
+
aws-sdk-resources (= 2.9.21)
|
15
|
+
aws-sdk-core (2.9.21)
|
16
|
+
aws-sigv4 (~> 1.0)
|
15
17
|
jmespath (~> 1.0)
|
16
|
-
aws-sdk-resources (2.
|
17
|
-
aws-sdk-core (= 2.
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
aws-sdk-resources (2.9.21)
|
19
|
+
aws-sdk-core (= 2.9.21)
|
20
|
+
aws-sigv4 (1.0.0)
|
21
|
+
cool.io (1.5.0)
|
22
|
+
fluentd (0.14.16)
|
23
|
+
cool.io (>= 1.4.5, < 2.0.0)
|
21
24
|
http_parser.rb (>= 0.5.1, < 0.7.0)
|
22
|
-
|
23
|
-
|
24
|
-
serverengine (>= 1.6.4)
|
25
|
+
msgpack (>= 0.7.0, < 2.0.0)
|
26
|
+
serverengine (>= 2.0.4, < 3.0.0)
|
25
27
|
sigdump (~> 0.2.2)
|
26
|
-
strptime (
|
27
|
-
tzinfo (
|
28
|
-
tzinfo-data (
|
28
|
+
strptime (~> 0.1.7)
|
29
|
+
tzinfo (~> 1.0)
|
30
|
+
tzinfo-data (~> 1.0)
|
29
31
|
yajl-ruby (~> 1.0)
|
30
32
|
http_parser.rb (0.6.0)
|
31
|
-
jmespath (1.3.
|
32
|
-
|
33
|
-
|
34
|
-
oj (2.17.1)
|
33
|
+
jmespath (1.3.1)
|
34
|
+
msgpack (1.1.0)
|
35
|
+
oj (3.0.9)
|
35
36
|
rake (11.2.2)
|
36
|
-
|
37
|
+
rubyzip (1.2.1)
|
38
|
+
serverengine (2.0.5)
|
37
39
|
sigdump (~> 0.2.2)
|
38
40
|
sigdump (0.2.4)
|
39
|
-
strptime (0.1.
|
40
|
-
thread_safe (0.3.
|
41
|
-
tzinfo (1.2.
|
41
|
+
strptime (0.1.9)
|
42
|
+
thread_safe (0.3.6)
|
43
|
+
tzinfo (1.2.3)
|
42
44
|
thread_safe (~> 0.1)
|
43
|
-
tzinfo-data (1.
|
45
|
+
tzinfo-data (1.2017.2)
|
44
46
|
tzinfo (>= 1.0.0)
|
45
|
-
yajl-ruby (1.
|
47
|
+
yajl-ruby (1.3.0)
|
46
48
|
|
47
49
|
PLATFORMS
|
48
50
|
ruby
|
@@ -52,4 +54,4 @@ DEPENDENCIES
|
|
52
54
|
rake
|
53
55
|
|
54
56
|
BUNDLED WITH
|
55
|
-
1.
|
57
|
+
1.14.6
|
data/README.md
CHANGED
@@ -45,6 +45,7 @@ S3 Event Example Intake
|
|
45
45
|
# this plugin!
|
46
46
|
<match sqs.s3.event>
|
47
47
|
type s3_input
|
48
|
+
merge_record no
|
48
49
|
s3_bucket_key s3_bucket
|
49
50
|
s3_object_key_key s3_object
|
50
51
|
uncompress gzip
|
@@ -57,3 +58,13 @@ S3 Event Example Intake
|
|
57
58
|
split_key Records
|
58
59
|
tag cloudtrail
|
59
60
|
</match>
|
61
|
+
# params
|
62
|
+
tag my.new.tag : tag name to emit new record as
|
63
|
+
uncompress gzip : decompression algorithm (only gzip:-/)
|
64
|
+
s3_bucket_key my_s3_bucket : The name of your S3 bucket
|
65
|
+
s3_object_key_key /some/cool/object : The path to your S3 object
|
66
|
+
merge_record yes|no : Do we merge or replace the input record
|
67
|
+
remove_keys key1, key2 : keys that we remove after reading the s3 object
|
68
|
+
compression_exts gz, zip : extensions that we uncompress. Allows you to ingest both compressed and uncompressed files
|
69
|
+
record_key : if set, the record will be placed in this key
|
70
|
+
|
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-s3-input"
|
7
|
-
spec.version = "0.0.
|
7
|
+
spec.version = "0.0.11"
|
8
8
|
spec.authors = ["Anthony Johnson"]
|
9
9
|
spec.email = ["ansoni@gmail.com"]
|
10
10
|
spec.description = %q{Fluentd plugin to read a file from S3 and emit it}
|
@@ -21,4 +21,5 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency "fluentd"
|
22
22
|
spec.add_runtime_dependency "aws-sdk"
|
23
23
|
spec.add_runtime_dependency "oj"
|
24
|
+
spec.add_runtime_dependency "rubyzip"
|
24
25
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json'
|
3
|
+
require 'zip'
|
1
4
|
|
2
5
|
module Fluent
|
3
6
|
class S3InputOutput < Output
|
@@ -14,8 +17,14 @@ module Fluent
|
|
14
17
|
config_param :s3_bucket_key
|
15
18
|
config_param :s3_object_key_key
|
16
19
|
config_param :tag
|
17
|
-
|
18
|
-
config_param :
|
20
|
+
config_param :merge_record, :bool, :default => false
|
21
|
+
config_param :record_key, :string, :default => nil
|
22
|
+
config_param :remove_keys, :array, :default => []
|
23
|
+
config_param :time_keys, :array, :default => []
|
24
|
+
config_param :time_format, :string, :default => "%Y-%m-%dT%H:%M:%S"
|
25
|
+
config_param :gzip_exts, :array, :default => []
|
26
|
+
config_param :zip_exts, :array, :default => []
|
27
|
+
config_param :format, :string, :default => 'json'
|
19
28
|
|
20
29
|
attr_accessor :s3
|
21
30
|
|
@@ -28,6 +37,7 @@ module Fluent
|
|
28
37
|
|
29
38
|
def configure(conf)
|
30
39
|
super
|
40
|
+
|
31
41
|
if @aws_key_id and @aws_sec_key
|
32
42
|
@s3 = Aws::S3::Client.new(
|
33
43
|
region: "us-east-1",
|
@@ -35,27 +45,86 @@ module Fluent
|
|
35
45
|
secret_access_key: @aws_sec_key,
|
36
46
|
)
|
37
47
|
else
|
38
|
-
@s3 = Aws::S3::Client.new(
|
48
|
+
@s3 = Aws::S3::Client.new()
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Allow JSON data in a couple of formats
|
53
|
+
# {} single event
|
54
|
+
# [{},{}] array of events
|
55
|
+
# {}\n{}\n{} concatenated events (flume)
|
56
|
+
def normalize_json(json)
|
57
|
+
if json[0] != "["
|
58
|
+
json=json.gsub /}\n{/,"},{"
|
59
|
+
json="[#{json}]"
|
39
60
|
end
|
61
|
+
json
|
40
62
|
end
|
41
63
|
|
42
64
|
def emit(tag, es, chain)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
65
|
+
begin
|
66
|
+
tag_parts = tag.split('.')
|
67
|
+
es.each { |time, record|
|
68
|
+
s3_bucket = record[s3_bucket_key]
|
69
|
+
s3_key = record[s3_object_key_key]
|
70
|
+
s3_key_ext = s3_key.split(".")[-1]
|
71
|
+
resp = s3.get_object(bucket: s3_bucket, key: s3_key)
|
72
|
+
|
73
|
+
if @gzip_exts.include?(s3_key_ext)
|
74
|
+
input = Zlib::GzipReader.new(resp.body)
|
75
|
+
elsif @zip_exts.include?(s3_key_ext)
|
76
|
+
io = Zip::InputStream.new(resp.body)
|
77
|
+
input = io.get_next_entry
|
78
|
+
#input = Zip::File.open(resp.body).entries.first.get_input_stream
|
79
|
+
else
|
80
|
+
input = resp.body
|
81
|
+
end
|
82
|
+
|
83
|
+
new_record = {}
|
84
|
+
if @merge_record
|
85
|
+
new_record = {}.merge(record)
|
86
|
+
end
|
87
|
+
|
88
|
+
s3_record = {}
|
89
|
+
if @format == 'json'
|
90
|
+
json_data=normalize_json input.read
|
91
|
+
s3_record = Oj.load(json_data)
|
92
|
+
elsif @format == 'csv'
|
93
|
+
data = input.read
|
94
|
+
File.open("/tmp/s3debug", 'w') { |file| file.write(data) }
|
95
|
+
s3_record=CSV.parse(data).to_json
|
96
|
+
else
|
97
|
+
raise "Unsupported format - #{@format}"
|
98
|
+
end
|
99
|
+
|
100
|
+
# parse the time from the record
|
101
|
+
@time_keys.each do |time_key|
|
102
|
+
puts "Look for #{time_key} in #{new_record}"
|
103
|
+
if s3_record.include? time_key
|
104
|
+
puts "Reset time for #{time_key}"
|
105
|
+
time=Time.strptime(new_record[time_key], @time_format).to_i
|
106
|
+
puts "Setting time to #{time}"
|
107
|
+
break
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
if @record_key == nil
|
112
|
+
tmp_record=s3_record.merge(new_record)
|
113
|
+
new_record=tmp_record
|
114
|
+
else
|
115
|
+
new_record[record_key]=s3_record
|
116
|
+
end
|
117
|
+
|
118
|
+
@remove_keys.each do |key_to_remove|
|
119
|
+
new_record.delete(key_to_remove)
|
120
|
+
end
|
121
|
+
|
122
|
+
router.emit(@tag, time, new_record)
|
123
|
+
}
|
124
|
+
chain.next
|
125
|
+
rescue StandardError => e
|
126
|
+
$log.warn "s3_input: #{e.class} #{e.message} #{e.backtrace.join(', ')}"
|
127
|
+
end
|
59
128
|
end
|
60
129
|
end
|
61
130
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-s3-input
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anthony Johnson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubyzip
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
description: Fluentd plugin to read a file from S3 and emit it
|
70
84
|
email:
|
71
85
|
- ansoni@gmail.com
|