embulk-parser-fluent-s3-log 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1947e0f5662b7d5965953676f825e5e3d2c03527
4
+ data.tar.gz: 6458757eb463583a933bbdbfa2f014cae6428eb3
5
+ SHA512:
6
+ metadata.gz: 8b692e0290673e4237ed0c0954023a8c7770e8cccc634ec4c16547a51a0a22a8337de260c9e7dc795e6260b00b70b81b3f8469c82f51230805588fb3df9ffede
7
+ data.tar.gz: ae0a7ad2ba61393c8ad5b9186c32570bce32e01675c6f1ea2c3198bb4237bf8d042fca4161fee8761a9f9b3736d1fc4709cb0f268dd020e63dca9ef3e238f9ff
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # Fluent S3 Log parser plugin for Embulk
2
+
3
+ This plugin parses fluent-plugin-s3's output log file.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+ * **Guess supported**: no
9
+
10
+ ## Configuration
11
+
12
+ - **columns**: column name and its type. (array, required)
13
+
14
+ ## Example
15
+
16
+ ```yaml
17
+ in:
18
+ type: any file input plugin type
19
+ parser:
20
+ type: fluent-s3-log
21
+ columns:
22
+ - {name: id, type: long}
23
+ - {name: path, type: string}
24
+ ```
25
+
26
+ <!-- (If guess supported) you don't have to write `parser:` section in the configuration file. After writing `in:` section, you can let embulk guess `parser:` section using this command:
27
+
28
+ ```
29
+ $ embulk install embulk-parser-fluent-s3-log
30
+ $ embulk guess -g fluent-s3-log config.yml -o guessed.yml
31
+ ``` -->
32
+
33
+ ## Build
34
+
35
+ ```
36
+ $ rake
37
+ ```
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,18 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-parser-fluent-s3-log"
4
+ spec.version = "0.0.1"
5
+ spec.authors = ["y-matsuwitter"]
6
+ spec.summary = "Fluent S3 Log parser plugin for Embulk"
7
+ spec.description = "Parses Fluent S3 Log files read by other file input plugins."
8
+ spec.email = ["info@yuki-matsumoto.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/y-matsuwitter/embulk-parser-fluent-s3-log"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_development_dependency 'bundler', ['~> 1.0']
17
+ spec.add_development_dependency 'rake', ['>= 10.0']
18
+ end
@@ -0,0 +1,63 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "fluent-s3-log" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ #require "embulk/parser/fluent-s3-log.rb"
12
+
13
+ #class FluentS3LogParserGuessPlugin < GuessPlugin
14
+ # Plugin.register_guess("fluent-s3-log", self)
15
+ #
16
+ # def guess(config, sample_buffer)
17
+ # if sample_buffer[0,2] == GZIP_HEADER
18
+ # guessed = {}
19
+ # guessed["type"] = "fluent-s3-log"
20
+ # guessed["property1"] = "guessed-value"
21
+ # return {"parser" => guessed}
22
+ # else
23
+ # return {}
24
+ # end
25
+ # end
26
+ #end
27
+
28
+ #class FluentS3LogParserGuessPlugin < TextGuessPlugin
29
+ # Plugin.register_guess("fluent-s3-log", self)
30
+ #
31
+ # def guess_text(config, sample_text)
32
+ # js = JSON.parse(sample_text) rescue nil
33
+ # if js && js["mykeyword"] == "keyword"
34
+ # guessed = {}
35
+ # guessed["type"] = "fluent-s3-log"
36
+ # guessed["property1"] = "guessed-value"
37
+ # return {"parser" => guessed}
38
+ # else
39
+ # return {}
40
+ # end
41
+ # end
42
+ #end
43
+
44
+ #class FluentS3LogParserGuessPlugin < LineGuessPlugin
45
+ # Plugin.register_guess("fluent-s3-log", self)
46
+ #
47
+ # def guess_lines(config, sample_lines)
48
+ # all_line_matched = sample_lines.all? do |line|
49
+ # line =~ /mypattern/
50
+ # end
51
+ # if all_line_matched
52
+ # guessed = {}
53
+ # guessed["type"] = "fluent-s3-log"
54
+ # guessed["property1"] = "guessed-value"
55
+ # return {"parser" => guessed}
56
+ # else
57
+ # return {}
58
+ # end
59
+ # end
60
+ #end
61
+
62
+ end
63
+ end
@@ -0,0 +1,67 @@
1
+ require "stringio"
2
+
3
+ module Embulk
4
+ module Parser
5
+
6
+ class FluentS3LogParserPlugin < ParserPlugin
7
+ Plugin.register_parser("fluent-s3-log", self)
8
+
9
+ def self.transaction(config, &control)
10
+ # configuration code:
11
+ task = {
12
+ :columns => config.param("columns", :array)
13
+ }
14
+
15
+ columns = task[:columns].each_with_index.map do |c, i|
16
+ Column.new(i+2, c["name"], c["type"].to_sym)
17
+ end
18
+ columns.insert(0, Column.new(0, "time", :timestamp))
19
+ columns.insert(1, Column.new(1, "key", :string))
20
+ yield(task, columns)
21
+ end
22
+
23
+ def run(file_input)
24
+ while file = file_input.next_file
25
+ StringIO.new(file.read).each_line do |buffer|
26
+ # parsering code
27
+ splitted = buffer.split("\t")
28
+ record = [
29
+ Time.strptime(splitted[0], "%Y-%m-%dT%H:%M:%S%Z"),
30
+ splitted[1]
31
+ ]
32
+ j = JSON.parse(splitted[2])
33
+ record += make_record(@task["columns"], j)
34
+ page_builder.add(record)
35
+ end
36
+ end
37
+ page_builder.finish
38
+ end
39
+ private
40
+
41
+ def make_record(schema, e)
42
+ schema.map do |c|
43
+ name = c["name"]
44
+ val = e[name]
45
+
46
+ v = val.nil? ? "" : val
47
+ type = c["type"]
48
+ case type
49
+ when "string"
50
+ v
51
+ when "long"
52
+ v.to_i
53
+ when "double"
54
+ v.to_f
55
+ when "boolean"
56
+ ["yes", "true", "1"].include?(v.downcase)
57
+ when "timestamp"
58
+ v.empty? ? nil : Time.strptime(v, c["format"])
59
+ else
60
+ raise "Unsupported type #{type}"
61
+ end
62
+ end
63
+ end
64
+ end
65
+
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-fluent-s3-log
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - y-matsuwitter
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Parses Fluent S3 Log files read by other file input plugins.
42
+ email:
43
+ - info@yuki-matsumoto.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - Gemfile
50
+ - LICENSE.txt
51
+ - README.md
52
+ - Rakefile
53
+ - embulk-parser-fluent-s3-log.gemspec
54
+ - lib/embulk/guess/fluent-s3-log.rb
55
+ - lib/embulk/parser/fluent-s3-log.rb
56
+ homepage: https://github.com/y-matsuwitter/embulk-parser-fluent-s3-log
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 2.0.3
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: Fluent S3 Log parser plugin for Embulk
80
+ test_files: []