fluent-plugin-s3-input 0.0.1 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +28 -26
- data/README.md +11 -0
- data/fluent-plugin-s3-input.gemspec +2 -1
- data/lib/fluent/plugin/out_s3_input.rb +88 -19
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2cd64e1194e16d1821e054cf9dde5cd870ac1475
|
4
|
+
data.tar.gz: d91e30a771fa8afa9000b216b777c3bbc5f3c6cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7425c4192cf17c805da883e8cd97ee86956a2f00992503f9e4c74895da7ce039665a941fac78aab879fe56323c39c29bc202feb2e3f8b000fe9a41023398c32a
|
7
|
+
data.tar.gz: f825c630de89ba79c732f97a85a230ce62d604ba3c81e988e8fdf729a82f9a69288004de65035e0b27a2bb8f1ba6e7d53c704e8dccdf4f22b23e5694139a5e7d
|
data/Gemfile.lock
CHANGED
@@ -1,48 +1,50 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fluent-plugin-s3-input (0.0.
|
4
|
+
fluent-plugin-s3-input (0.0.11)
|
5
5
|
aws-sdk
|
6
6
|
fluentd
|
7
7
|
oj
|
8
|
+
rubyzip
|
8
9
|
|
9
10
|
GEM
|
10
11
|
remote: https://rubygems.org/
|
11
12
|
specs:
|
12
|
-
aws-sdk (2.
|
13
|
-
aws-sdk-resources (= 2.
|
14
|
-
aws-sdk-core (2.
|
13
|
+
aws-sdk (2.9.21)
|
14
|
+
aws-sdk-resources (= 2.9.21)
|
15
|
+
aws-sdk-core (2.9.21)
|
16
|
+
aws-sigv4 (~> 1.0)
|
15
17
|
jmespath (~> 1.0)
|
16
|
-
aws-sdk-resources (2.
|
17
|
-
aws-sdk-core (= 2.
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
aws-sdk-resources (2.9.21)
|
19
|
+
aws-sdk-core (= 2.9.21)
|
20
|
+
aws-sigv4 (1.0.0)
|
21
|
+
cool.io (1.5.0)
|
22
|
+
fluentd (0.14.16)
|
23
|
+
cool.io (>= 1.4.5, < 2.0.0)
|
21
24
|
http_parser.rb (>= 0.5.1, < 0.7.0)
|
22
|
-
|
23
|
-
|
24
|
-
serverengine (>= 1.6.4)
|
25
|
+
msgpack (>= 0.7.0, < 2.0.0)
|
26
|
+
serverengine (>= 2.0.4, < 3.0.0)
|
25
27
|
sigdump (~> 0.2.2)
|
26
|
-
strptime (
|
27
|
-
tzinfo (
|
28
|
-
tzinfo-data (
|
28
|
+
strptime (~> 0.1.7)
|
29
|
+
tzinfo (~> 1.0)
|
30
|
+
tzinfo-data (~> 1.0)
|
29
31
|
yajl-ruby (~> 1.0)
|
30
32
|
http_parser.rb (0.6.0)
|
31
|
-
jmespath (1.3.
|
32
|
-
|
33
|
-
|
34
|
-
oj (2.17.1)
|
33
|
+
jmespath (1.3.1)
|
34
|
+
msgpack (1.1.0)
|
35
|
+
oj (3.0.9)
|
35
36
|
rake (11.2.2)
|
36
|
-
|
37
|
+
rubyzip (1.2.1)
|
38
|
+
serverengine (2.0.5)
|
37
39
|
sigdump (~> 0.2.2)
|
38
40
|
sigdump (0.2.4)
|
39
|
-
strptime (0.1.
|
40
|
-
thread_safe (0.3.
|
41
|
-
tzinfo (1.2.
|
41
|
+
strptime (0.1.9)
|
42
|
+
thread_safe (0.3.6)
|
43
|
+
tzinfo (1.2.3)
|
42
44
|
thread_safe (~> 0.1)
|
43
|
-
tzinfo-data (1.
|
45
|
+
tzinfo-data (1.2017.2)
|
44
46
|
tzinfo (>= 1.0.0)
|
45
|
-
yajl-ruby (1.
|
47
|
+
yajl-ruby (1.3.0)
|
46
48
|
|
47
49
|
PLATFORMS
|
48
50
|
ruby
|
@@ -52,4 +54,4 @@ DEPENDENCIES
|
|
52
54
|
rake
|
53
55
|
|
54
56
|
BUNDLED WITH
|
55
|
-
1.
|
57
|
+
1.14.6
|
data/README.md
CHANGED
@@ -45,6 +45,7 @@ S3 Event Example Intake
|
|
45
45
|
# this plugin!
|
46
46
|
<match sqs.s3.event>
|
47
47
|
type s3_input
|
48
|
+
merge_record no
|
48
49
|
s3_bucket_key s3_bucket
|
49
50
|
s3_object_key_key s3_object
|
50
51
|
uncompress gzip
|
@@ -57,3 +58,13 @@ S3 Event Example Intake
|
|
57
58
|
split_key Records
|
58
59
|
tag cloudtrail
|
59
60
|
</match>
|
61
|
+
# params
|
62
|
+
tag my.new.tag : tag name to emit new record as
|
63
|
+
uncompress gzip : decompression algorithm (only gzip:-/)
|
64
|
+
s3_bucket_key my_s3_bucket : The name of your S3 bucket
|
65
|
+
s3_object_key_key /some/cool/object : The path to your S3 object
|
66
|
+
merge_record yes|no : Do we merge or replace the input record
|
67
|
+
remove_keys key1, key2 : keys that we remove after reading the s3 object
|
68
|
+
compression_exts gz, zip : extensions that we uncompress. Allows you to ingest both compressed and uncompressed files
|
69
|
+
record_key : if set, the record will be placed in this key
|
70
|
+
|
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-s3-input"
|
7
|
-
spec.version = "0.0.
|
7
|
+
spec.version = "0.0.11"
|
8
8
|
spec.authors = ["Anthony Johnson"]
|
9
9
|
spec.email = ["ansoni@gmail.com"]
|
10
10
|
spec.description = %q{Fluentd plugin to read a file from S3 and emit it}
|
@@ -21,4 +21,5 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency "fluentd"
|
22
22
|
spec.add_runtime_dependency "aws-sdk"
|
23
23
|
spec.add_runtime_dependency "oj"
|
24
|
+
spec.add_runtime_dependency "rubyzip"
|
24
25
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json'
|
3
|
+
require 'zip'
|
1
4
|
|
2
5
|
module Fluent
|
3
6
|
class S3InputOutput < Output
|
@@ -14,8 +17,14 @@ module Fluent
|
|
14
17
|
config_param :s3_bucket_key
|
15
18
|
config_param :s3_object_key_key
|
16
19
|
config_param :tag
|
17
|
-
|
18
|
-
config_param :
|
20
|
+
config_param :merge_record, :bool, :default => false
|
21
|
+
config_param :record_key, :string, :default => nil
|
22
|
+
config_param :remove_keys, :array, :default => []
|
23
|
+
config_param :time_keys, :array, :default => []
|
24
|
+
config_param :time_format, :string, :default => "%Y-%m-%dT%H:%M:%S"
|
25
|
+
config_param :gzip_exts, :array, :default => []
|
26
|
+
config_param :zip_exts, :array, :default => []
|
27
|
+
config_param :format, :string, :default => 'json'
|
19
28
|
|
20
29
|
attr_accessor :s3
|
21
30
|
|
@@ -28,6 +37,7 @@ module Fluent
|
|
28
37
|
|
29
38
|
def configure(conf)
|
30
39
|
super
|
40
|
+
|
31
41
|
if @aws_key_id and @aws_sec_key
|
32
42
|
@s3 = Aws::S3::Client.new(
|
33
43
|
region: "us-east-1",
|
@@ -35,27 +45,86 @@ module Fluent
|
|
35
45
|
secret_access_key: @aws_sec_key,
|
36
46
|
)
|
37
47
|
else
|
38
|
-
@s3 = Aws::S3::Client.new(
|
48
|
+
@s3 = Aws::S3::Client.new()
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Allow JSON data in a couple of formats
|
53
|
+
# {} single event
|
54
|
+
# [{},{}] array of events
|
55
|
+
# {}\n{}\n{} concatenated events (flume)
|
56
|
+
def normalize_json(json)
|
57
|
+
if json[0] != "["
|
58
|
+
json=json.gsub /}\n{/,"},{"
|
59
|
+
json="[#{json}]"
|
39
60
|
end
|
61
|
+
json
|
40
62
|
end
|
41
63
|
|
42
64
|
def emit(tag, es, chain)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
65
|
+
begin
|
66
|
+
tag_parts = tag.split('.')
|
67
|
+
es.each { |time, record|
|
68
|
+
s3_bucket = record[s3_bucket_key]
|
69
|
+
s3_key = record[s3_object_key_key]
|
70
|
+
s3_key_ext = s3_key.split(".")[-1]
|
71
|
+
resp = s3.get_object(bucket: s3_bucket, key: s3_key)
|
72
|
+
|
73
|
+
if @gzip_exts.include?(s3_key_ext)
|
74
|
+
input = Zlib::GzipReader.new(resp.body)
|
75
|
+
elsif @zip_exts.include?(s3_key_ext)
|
76
|
+
io = Zip::InputStream.new(resp.body)
|
77
|
+
input = io.get_next_entry
|
78
|
+
#input = Zip::File.open(resp.body).entries.first.get_input_stream
|
79
|
+
else
|
80
|
+
input = resp.body
|
81
|
+
end
|
82
|
+
|
83
|
+
new_record = {}
|
84
|
+
if @merge_record
|
85
|
+
new_record = {}.merge(record)
|
86
|
+
end
|
87
|
+
|
88
|
+
s3_record = {}
|
89
|
+
if @format == 'json'
|
90
|
+
json_data=normalize_json input.read
|
91
|
+
s3_record = Oj.load(json_data)
|
92
|
+
elsif @format == 'csv'
|
93
|
+
data = input.read
|
94
|
+
File.open("/tmp/s3debug", 'w') { |file| file.write(data) }
|
95
|
+
s3_record=CSV.parse(data).to_json
|
96
|
+
else
|
97
|
+
raise "Unsupported format - #{@format}"
|
98
|
+
end
|
99
|
+
|
100
|
+
# parse the time from the record
|
101
|
+
@time_keys.each do |time_key|
|
102
|
+
puts "Look for #{time_key} in #{new_record}"
|
103
|
+
if s3_record.include? time_key
|
104
|
+
puts "Reset time for #{time_key}"
|
105
|
+
time=Time.strptime(new_record[time_key], @time_format).to_i
|
106
|
+
puts "Setting time to #{time}"
|
107
|
+
break
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
if @record_key == nil
|
112
|
+
tmp_record=s3_record.merge(new_record)
|
113
|
+
new_record=tmp_record
|
114
|
+
else
|
115
|
+
new_record[record_key]=s3_record
|
116
|
+
end
|
117
|
+
|
118
|
+
@remove_keys.each do |key_to_remove|
|
119
|
+
new_record.delete(key_to_remove)
|
120
|
+
end
|
121
|
+
|
122
|
+
router.emit(@tag, time, new_record)
|
123
|
+
}
|
124
|
+
chain.next
|
125
|
+
rescue StandardError => e
|
126
|
+
$log.warn "s3_input: #{e.class} #{e.message} #{e.backtrace.join(', ')}"
|
127
|
+
end
|
59
128
|
end
|
60
129
|
end
|
61
130
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-s3-input
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anthony Johnson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubyzip
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
description: Fluentd plugin to read a file from S3 and emit it
|
70
84
|
email:
|
71
85
|
- ansoni@gmail.com
|