fluent-plugin-s3-input 0.0.1 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: abd2bf6ecf2453a5d47cf8702ff3fe95f50dbf79
4
- data.tar.gz: 9c73ec74af0ef99f899d0dec6e225ee4abd105dc
3
+ metadata.gz: 2cd64e1194e16d1821e054cf9dde5cd870ac1475
4
+ data.tar.gz: d91e30a771fa8afa9000b216b777c3bbc5f3c6cf
5
5
  SHA512:
6
- metadata.gz: feed1255cafe0de56b2a0df87aa82093e2b07f87a5372f51512527ad4bb2db3c37177e017908aca09f041499d3e70232436101bb1b429c116a27d95ef94da3eb
7
- data.tar.gz: 664811196401b7a22cb7078ad39de99a27f865dd7602d351327c595c991e7f54bbf923e7199b078871a5baf927f3646b42b61bb4e08c1204ace0f552f3c04def
6
+ metadata.gz: 7425c4192cf17c805da883e8cd97ee86956a2f00992503f9e4c74895da7ce039665a941fac78aab879fe56323c39c29bc202feb2e3f8b000fe9a41023398c32a
7
+ data.tar.gz: f825c630de89ba79c732f97a85a230ce62d604ba3c81e988e8fdf729a82f9a69288004de65035e0b27a2bb8f1ba6e7d53c704e8dccdf4f22b23e5694139a5e7d
data/Gemfile.lock CHANGED
@@ -1,48 +1,50 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-s3-input (0.0.1)
4
+ fluent-plugin-s3-input (0.0.11)
5
5
  aws-sdk
6
6
  fluentd
7
7
  oj
8
+ rubyzip
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
11
12
  specs:
12
- aws-sdk (2.3.22)
13
- aws-sdk-resources (= 2.3.22)
14
- aws-sdk-core (2.3.22)
13
+ aws-sdk (2.9.21)
14
+ aws-sdk-resources (= 2.9.21)
15
+ aws-sdk-core (2.9.21)
16
+ aws-sigv4 (~> 1.0)
15
17
  jmespath (~> 1.0)
16
- aws-sdk-resources (2.3.22)
17
- aws-sdk-core (= 2.3.22)
18
- cool.io (1.4.4)
19
- fluentd (0.14.1)
20
- cool.io (>= 1.4.3, < 2.0.0)
18
+ aws-sdk-resources (2.9.21)
19
+ aws-sdk-core (= 2.9.21)
20
+ aws-sigv4 (1.0.0)
21
+ cool.io (1.5.0)
22
+ fluentd (0.14.16)
23
+ cool.io (>= 1.4.5, < 2.0.0)
21
24
  http_parser.rb (>= 0.5.1, < 0.7.0)
22
- json (>= 1.4.3)
23
- msgpack (>= 0.7.0)
24
- serverengine (>= 1.6.4)
25
+ msgpack (>= 0.7.0, < 2.0.0)
26
+ serverengine (>= 2.0.4, < 3.0.0)
25
27
  sigdump (~> 0.2.2)
26
- strptime (>= 0.1.7)
27
- tzinfo (>= 1.0.0)
28
- tzinfo-data (>= 1.0.0)
28
+ strptime (~> 0.1.7)
29
+ tzinfo (~> 1.0)
30
+ tzinfo-data (~> 1.0)
29
31
  yajl-ruby (~> 1.0)
30
32
  http_parser.rb (0.6.0)
31
- jmespath (1.3.0)
32
- json (2.0.1)
33
- msgpack (1.0.0)
34
- oj (2.17.1)
33
+ jmespath (1.3.1)
34
+ msgpack (1.1.0)
35
+ oj (3.0.9)
35
36
  rake (11.2.2)
36
- serverengine (1.6.4)
37
+ rubyzip (1.2.1)
38
+ serverengine (2.0.5)
37
39
  sigdump (~> 0.2.2)
38
40
  sigdump (0.2.4)
39
- strptime (0.1.8)
40
- thread_safe (0.3.5)
41
- tzinfo (1.2.2)
41
+ strptime (0.1.9)
42
+ thread_safe (0.3.6)
43
+ tzinfo (1.2.3)
42
44
  thread_safe (~> 0.1)
43
- tzinfo-data (1.2016.6)
45
+ tzinfo-data (1.2017.2)
44
46
  tzinfo (>= 1.0.0)
45
- yajl-ruby (1.2.1)
47
+ yajl-ruby (1.3.0)
46
48
 
47
49
  PLATFORMS
48
50
  ruby
@@ -52,4 +54,4 @@ DEPENDENCIES
52
54
  rake
53
55
 
54
56
  BUNDLED WITH
55
- 1.12.5
57
+ 1.14.6
data/README.md CHANGED
@@ -45,6 +45,7 @@ S3 Event Example Intake
45
45
  # this plugin!
46
46
  <match sqs.s3.event>
47
47
  type s3_input
48
+ merge_record no
48
49
  s3_bucket_key s3_bucket
49
50
  s3_object_key_key s3_object
50
51
  uncompress gzip
@@ -57,3 +58,13 @@ S3 Event Example Intake
57
58
  split_key Records
58
59
  tag cloudtrail
59
60
  </match>
61
+ # params
62
+ tag my.new.tag : tag name to emit new record as
63
+ uncompress gzip : decompression algorithm (only gzip:-/)
64
+ s3_bucket_key my_s3_bucket : The name of your S3 bucket
65
+ s3_object_key_key /some/cool/object : The path to your S3 object
66
+ merge_record yes|no : Do we merge or replace the input record
67
+ remove_keys key1, key2 : keys that we remove after reading the s3 object
68
+ compression_exts gz, zip : extensions that we uncompress. Allows you to ingest both compressed and uncompressed files
69
+ record_key : if set, the record will be placed in this key
70
+
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-s3-input"
7
- spec.version = "0.0.1"
7
+ spec.version = "0.0.11"
8
8
  spec.authors = ["Anthony Johnson"]
9
9
  spec.email = ["ansoni@gmail.com"]
10
10
  spec.description = %q{Fluentd plugin to read a file from S3 and emit it}
@@ -21,4 +21,5 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency "fluentd"
22
22
  spec.add_runtime_dependency "aws-sdk"
23
23
  spec.add_runtime_dependency "oj"
24
+ spec.add_runtime_dependency "rubyzip"
24
25
  end
@@ -1,3 +1,6 @@
1
+ require 'csv'
2
+ require 'json'
3
+ require 'zip'
1
4
 
2
5
  module Fluent
3
6
  class S3InputOutput < Output
@@ -14,8 +17,14 @@ module Fluent
14
17
  config_param :s3_bucket_key
15
18
  config_param :s3_object_key_key
16
19
  config_param :tag
17
- # supports: gzip
18
- config_param :uncompress, :string
20
+ config_param :merge_record, :bool, :default => false
21
+ config_param :record_key, :string, :default => nil
22
+ config_param :remove_keys, :array, :default => []
23
+ config_param :time_keys, :array, :default => []
24
+ config_param :time_format, :string, :default => "%Y-%m-%dT%H:%M:%S"
25
+ config_param :gzip_exts, :array, :default => []
26
+ config_param :zip_exts, :array, :default => []
27
+ config_param :format, :string, :default => 'json'
19
28
 
20
29
  attr_accessor :s3
21
30
 
@@ -28,6 +37,7 @@ module Fluent
28
37
 
29
38
  def configure(conf)
30
39
  super
40
+
31
41
  if @aws_key_id and @aws_sec_key
32
42
  @s3 = Aws::S3::Client.new(
33
43
  region: "us-east-1",
@@ -35,27 +45,86 @@ module Fluent
35
45
  secret_access_key: @aws_sec_key,
36
46
  )
37
47
  else
38
- @s3 = Aws::S3::Client.new(region: "us-east-1")
48
+ @s3 = Aws::S3::Client.new()
49
+ end
50
+ end
51
+
52
+ # Allow JSON data in a couple of formats
53
+ # {} single event
54
+ # [{},{}] array of events
55
+ # {}\n{}\n{} concatenated events (flume)
56
+ def normalize_json(json)
57
+ if json[0] != "["
58
+ json=json.gsub /}\n{/,"},{"
59
+ json="[#{json}]"
39
60
  end
61
+ json
40
62
  end
41
63
 
42
64
  def emit(tag, es, chain)
43
- tag_parts = tag.split('.')
44
- es.each { |time, record|
45
- s3_bucket = record[s3_bucket_key]
46
- s3_key = record[s3_object_key_key]
47
- resp = s3.get_object(bucket: s3_bucket, key: s3_key)
48
- if @uncompress && @uncompress == "gzip"
49
- input = Zlib::GzipReader.new(resp.body)
50
- else
51
- input = resp.body
52
- end
53
- new_record = Oj.load(input.read)
54
- router.emit(@tag, time, new_record)
55
- }
56
- chain.next
57
- rescue => e
58
- $log.warn "s3_input: #{e.class} #{e.message} #{e.backtrace.join(', ')}"
65
+ begin
66
+ tag_parts = tag.split('.')
67
+ es.each { |time, record|
68
+ s3_bucket = record[s3_bucket_key]
69
+ s3_key = record[s3_object_key_key]
70
+ s3_key_ext = s3_key.split(".")[-1]
71
+ resp = s3.get_object(bucket: s3_bucket, key: s3_key)
72
+
73
+ if @gzip_exts.include?(s3_key_ext)
74
+ input = Zlib::GzipReader.new(resp.body)
75
+ elsif @zip_exts.include?(s3_key_ext)
76
+ io = Zip::InputStream.new(resp.body)
77
+ input = io.get_next_entry
78
+ #input = Zip::File.open(resp.body).entries.first.get_input_stream
79
+ else
80
+ input = resp.body
81
+ end
82
+
83
+ new_record = {}
84
+ if @merge_record
85
+ new_record = {}.merge(record)
86
+ end
87
+
88
+ s3_record = {}
89
+ if @format == 'json'
90
+ json_data=normalize_json input.read
91
+ s3_record = Oj.load(json_data)
92
+ elsif @format == 'csv'
93
+ data = input.read
94
+ File.open("/tmp/s3debug", 'w') { |file| file.write(data) }
95
+ s3_record=CSV.parse(data).to_json
96
+ else
97
+ raise "Unsupported format - #{@format}"
98
+ end
99
+
100
+ # parse the time from the record
101
+ @time_keys.each do |time_key|
102
+ puts "Look for #{time_key} in #{new_record}"
103
+ if s3_record.include? time_key
104
+ puts "Reset time for #{time_key}"
105
+ time=Time.strptime(new_record[time_key], @time_format).to_i
106
+ puts "Setting time to #{time}"
107
+ break
108
+ end
109
+ end
110
+
111
+ if @record_key == nil
112
+ tmp_record=s3_record.merge(new_record)
113
+ new_record=tmp_record
114
+ else
115
+ new_record[record_key]=s3_record
116
+ end
117
+
118
+ @remove_keys.each do |key_to_remove|
119
+ new_record.delete(key_to_remove)
120
+ end
121
+
122
+ router.emit(@tag, time, new_record)
123
+ }
124
+ chain.next
125
+ rescue StandardError => e
126
+ $log.warn "s3_input: #{e.class} #{e.message} #{e.backtrace.join(', ')}"
127
+ end
59
128
  end
60
129
  end
61
130
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3-input
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Anthony Johnson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-18 00:00:00.000000000 Z
11
+ date: 2017-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubyzip
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description: Fluentd plugin to read a file from S3 and emit it
70
84
  email:
71
85
  - ansoni@gmail.com