fluent-plugin-s3-input 0.0.1 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: abd2bf6ecf2453a5d47cf8702ff3fe95f50dbf79
4
- data.tar.gz: 9c73ec74af0ef99f899d0dec6e225ee4abd105dc
3
+ metadata.gz: 2cd64e1194e16d1821e054cf9dde5cd870ac1475
4
+ data.tar.gz: d91e30a771fa8afa9000b216b777c3bbc5f3c6cf
5
5
  SHA512:
6
- metadata.gz: feed1255cafe0de56b2a0df87aa82093e2b07f87a5372f51512527ad4bb2db3c37177e017908aca09f041499d3e70232436101bb1b429c116a27d95ef94da3eb
7
- data.tar.gz: 664811196401b7a22cb7078ad39de99a27f865dd7602d351327c595c991e7f54bbf923e7199b078871a5baf927f3646b42b61bb4e08c1204ace0f552f3c04def
6
+ metadata.gz: 7425c4192cf17c805da883e8cd97ee86956a2f00992503f9e4c74895da7ce039665a941fac78aab879fe56323c39c29bc202feb2e3f8b000fe9a41023398c32a
7
+ data.tar.gz: f825c630de89ba79c732f97a85a230ce62d604ba3c81e988e8fdf729a82f9a69288004de65035e0b27a2bb8f1ba6e7d53c704e8dccdf4f22b23e5694139a5e7d
data/Gemfile.lock CHANGED
@@ -1,48 +1,50 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-s3-input (0.0.1)
4
+ fluent-plugin-s3-input (0.0.11)
5
5
  aws-sdk
6
6
  fluentd
7
7
  oj
8
+ rubyzip
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
11
12
  specs:
12
- aws-sdk (2.3.22)
13
- aws-sdk-resources (= 2.3.22)
14
- aws-sdk-core (2.3.22)
13
+ aws-sdk (2.9.21)
14
+ aws-sdk-resources (= 2.9.21)
15
+ aws-sdk-core (2.9.21)
16
+ aws-sigv4 (~> 1.0)
15
17
  jmespath (~> 1.0)
16
- aws-sdk-resources (2.3.22)
17
- aws-sdk-core (= 2.3.22)
18
- cool.io (1.4.4)
19
- fluentd (0.14.1)
20
- cool.io (>= 1.4.3, < 2.0.0)
18
+ aws-sdk-resources (2.9.21)
19
+ aws-sdk-core (= 2.9.21)
20
+ aws-sigv4 (1.0.0)
21
+ cool.io (1.5.0)
22
+ fluentd (0.14.16)
23
+ cool.io (>= 1.4.5, < 2.0.0)
21
24
  http_parser.rb (>= 0.5.1, < 0.7.0)
22
- json (>= 1.4.3)
23
- msgpack (>= 0.7.0)
24
- serverengine (>= 1.6.4)
25
+ msgpack (>= 0.7.0, < 2.0.0)
26
+ serverengine (>= 2.0.4, < 3.0.0)
25
27
  sigdump (~> 0.2.2)
26
- strptime (>= 0.1.7)
27
- tzinfo (>= 1.0.0)
28
- tzinfo-data (>= 1.0.0)
28
+ strptime (~> 0.1.7)
29
+ tzinfo (~> 1.0)
30
+ tzinfo-data (~> 1.0)
29
31
  yajl-ruby (~> 1.0)
30
32
  http_parser.rb (0.6.0)
31
- jmespath (1.3.0)
32
- json (2.0.1)
33
- msgpack (1.0.0)
34
- oj (2.17.1)
33
+ jmespath (1.3.1)
34
+ msgpack (1.1.0)
35
+ oj (3.0.9)
35
36
  rake (11.2.2)
36
- serverengine (1.6.4)
37
+ rubyzip (1.2.1)
38
+ serverengine (2.0.5)
37
39
  sigdump (~> 0.2.2)
38
40
  sigdump (0.2.4)
39
- strptime (0.1.8)
40
- thread_safe (0.3.5)
41
- tzinfo (1.2.2)
41
+ strptime (0.1.9)
42
+ thread_safe (0.3.6)
43
+ tzinfo (1.2.3)
42
44
  thread_safe (~> 0.1)
43
- tzinfo-data (1.2016.6)
45
+ tzinfo-data (1.2017.2)
44
46
  tzinfo (>= 1.0.0)
45
- yajl-ruby (1.2.1)
47
+ yajl-ruby (1.3.0)
46
48
 
47
49
  PLATFORMS
48
50
  ruby
@@ -52,4 +54,4 @@ DEPENDENCIES
52
54
  rake
53
55
 
54
56
  BUNDLED WITH
55
- 1.12.5
57
+ 1.14.6
data/README.md CHANGED
@@ -45,6 +45,7 @@ S3 Event Example Intake
45
45
  # this plugin!
46
46
  <match sqs.s3.event>
47
47
  type s3_input
48
+ merge_record no
48
49
  s3_bucket_key s3_bucket
49
50
  s3_object_key_key s3_object
50
51
  uncompress gzip
@@ -57,3 +58,13 @@ S3 Event Example Intake
57
58
  split_key Records
58
59
  tag cloudtrail
59
60
  </match>
61
+ # params
62
+ tag my.new.tag : tag name to emit new record as
63
+ uncompress gzip : decompression algorithm (only gzip:-/)
64
+ s3_bucket_key my_s3_bucket : The name of your S3 bucket
65
+ s3_object_key_key /some/cool/object : The path to your S3 object
66
+ merge_record yes|no : Do we merge or replace the input record
67
+ remove_keys key1, key2 : keys that we remove after reading the s3 object
68
+ compression_exts gz, zip : extensions that we uncompress. Allows you to ingest both compressed and uncompressed files
69
+ record_key : if set, the record will be placed in this key
70
+
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-s3-input"
7
- spec.version = "0.0.1"
7
+ spec.version = "0.0.11"
8
8
  spec.authors = ["Anthony Johnson"]
9
9
  spec.email = ["ansoni@gmail.com"]
10
10
  spec.description = %q{Fluentd plugin to read a file from S3 and emit it}
@@ -21,4 +21,5 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency "fluentd"
22
22
  spec.add_runtime_dependency "aws-sdk"
23
23
  spec.add_runtime_dependency "oj"
24
+ spec.add_runtime_dependency "rubyzip"
24
25
  end
@@ -1,3 +1,6 @@
1
+ require 'csv'
2
+ require 'json'
3
+ require 'zip'
1
4
 
2
5
  module Fluent
3
6
  class S3InputOutput < Output
@@ -14,8 +17,14 @@ module Fluent
14
17
  config_param :s3_bucket_key
15
18
  config_param :s3_object_key_key
16
19
  config_param :tag
17
- # supports: gzip
18
- config_param :uncompress, :string
20
+ config_param :merge_record, :bool, :default => false
21
+ config_param :record_key, :string, :default => nil
22
+ config_param :remove_keys, :array, :default => []
23
+ config_param :time_keys, :array, :default => []
24
+ config_param :time_format, :string, :default => "%Y-%m-%dT%H:%M:%S"
25
+ config_param :gzip_exts, :array, :default => []
26
+ config_param :zip_exts, :array, :default => []
27
+ config_param :format, :string, :default => 'json'
19
28
 
20
29
  attr_accessor :s3
21
30
 
@@ -28,6 +37,7 @@ module Fluent
28
37
 
29
38
  def configure(conf)
30
39
  super
40
+
31
41
  if @aws_key_id and @aws_sec_key
32
42
  @s3 = Aws::S3::Client.new(
33
43
  region: "us-east-1",
@@ -35,27 +45,86 @@ module Fluent
35
45
  secret_access_key: @aws_sec_key,
36
46
  )
37
47
  else
38
- @s3 = Aws::S3::Client.new(region: "us-east-1")
48
+ @s3 = Aws::S3::Client.new()
49
+ end
50
+ end
51
+
52
+ # Allow JSON data in a couple of formats
53
+ # {} single event
54
+ # [{},{}] array of events
55
+ # {}\n{}\n{} concatenated events (flume)
56
+ def normalize_json(json)
57
+ if json[0] != "["
58
+ json=json.gsub /}\n{/,"},{"
59
+ json="[#{json}]"
39
60
  end
61
+ json
40
62
  end
41
63
 
42
64
  def emit(tag, es, chain)
43
- tag_parts = tag.split('.')
44
- es.each { |time, record|
45
- s3_bucket = record[s3_bucket_key]
46
- s3_key = record[s3_object_key_key]
47
- resp = s3.get_object(bucket: s3_bucket, key: s3_key)
48
- if @uncompress && @uncompress == "gzip"
49
- input = Zlib::GzipReader.new(resp.body)
50
- else
51
- input = resp.body
52
- end
53
- new_record = Oj.load(input.read)
54
- router.emit(@tag, time, new_record)
55
- }
56
- chain.next
57
- rescue => e
58
- $log.warn "s3_input: #{e.class} #{e.message} #{e.backtrace.join(', ')}"
65
+ begin
66
+ tag_parts = tag.split('.')
67
+ es.each { |time, record|
68
+ s3_bucket = record[s3_bucket_key]
69
+ s3_key = record[s3_object_key_key]
70
+ s3_key_ext = s3_key.split(".")[-1]
71
+ resp = s3.get_object(bucket: s3_bucket, key: s3_key)
72
+
73
+ if @gzip_exts.include?(s3_key_ext)
74
+ input = Zlib::GzipReader.new(resp.body)
75
+ elsif @zip_exts.include?(s3_key_ext)
76
+ io = Zip::InputStream.new(resp.body)
77
+ input = io.get_next_entry
78
+ #input = Zip::File.open(resp.body).entries.first.get_input_stream
79
+ else
80
+ input = resp.body
81
+ end
82
+
83
+ new_record = {}
84
+ if @merge_record
85
+ new_record = {}.merge(record)
86
+ end
87
+
88
+ s3_record = {}
89
+ if @format == 'json'
90
+ json_data=normalize_json input.read
91
+ s3_record = Oj.load(json_data)
92
+ elsif @format == 'csv'
93
+ data = input.read
94
+ File.open("/tmp/s3debug", 'w') { |file| file.write(data) }
95
+ s3_record=CSV.parse(data).to_json
96
+ else
97
+ raise "Unsupported format - #{@format}"
98
+ end
99
+
100
+ # parse the time from the record
101
+ @time_keys.each do |time_key|
102
+ puts "Look for #{time_key} in #{new_record}"
103
+ if s3_record.include? time_key
104
+ puts "Reset time for #{time_key}"
105
+ time=Time.strptime(new_record[time_key], @time_format).to_i
106
+ puts "Setting time to #{time}"
107
+ break
108
+ end
109
+ end
110
+
111
+ if @record_key == nil
112
+ tmp_record=s3_record.merge(new_record)
113
+ new_record=tmp_record
114
+ else
115
+ new_record[record_key]=s3_record
116
+ end
117
+
118
+ @remove_keys.each do |key_to_remove|
119
+ new_record.delete(key_to_remove)
120
+ end
121
+
122
+ router.emit(@tag, time, new_record)
123
+ }
124
+ chain.next
125
+ rescue StandardError => e
126
+ $log.warn "s3_input: #{e.class} #{e.message} #{e.backtrace.join(', ')}"
127
+ end
59
128
  end
60
129
  end
61
130
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3-input
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Anthony Johnson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-18 00:00:00.000000000 Z
11
+ date: 2017-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubyzip
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description: Fluentd plugin to read a file from S3 and emit it
70
84
  email:
71
85
  - ansoni@gmail.com