embulk-parser-unpack 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 650827aaf0ff16b4ef922a62b7de00f80ad4ce9f
4
+ data.tar.gz: eb56b59f391e675e386d1b5051a9f37358a871b3
5
+ SHA512:
6
+ metadata.gz: 6c9131cb80a84d918b854db39a01ae0fc7a9c3cdb9992c8d170e498adaba42621880b102280491e8c0286eb9e61ee09f3b51dd12136864a86bb47a7cc97735e6
7
+ data.tar.gz: 398f7585994badbadd0d5f3f7c1702d5ab3752f18561e18c500278b1e48ef5de01b28efc659f6de4232bde2fd1aaff091bb540b89587eea6ebf383f6fd7a5183
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2016 Karri Niemelä
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # Unpack parser plugin for Embulk
2
+
3
+ TODO: Write short description here and embulk-parser-unpack.gemspec file.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+ * **Guess supported**: no
9
+
10
+ ## Configuration
11
+
12
+ - **option1**: description (integer, required)
13
+ - **option2**: description (string, default: `"myvalue"`)
14
+ - **option3**: description (string, default: `null`)
15
+
16
+ ## Example
17
+
18
+ ```yaml
19
+ in:
20
+ type: any file input plugin type
21
+ parser:
22
+ type: unpack
23
+ option1: example1
24
+ option2: example2
25
+ ```
26
+
27
+ (If guess supported) you don't have to write `parser:` section in the configuration file. After writing `in:` section, you can let embulk guess `parser:` section using this command:
28
+
29
+ ```
30
+ $ embulk gem install embulk-parser-unpack
31
+ $ embulk guess -g unpack config.yml -o guessed.yml
32
+ ```
33
+
34
+ ## Build
35
+
36
+ ```
37
+ $ rake
38
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,19 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-parser-unpack"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["Karri Niemel\u{e4}"]
6
+ spec.summary = "Unpack parser plugin for Embulk"
7
+ spec.description = "Parses files read by other file input plugins."
8
+ spec.email = ["kakoni@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/kakoni/embulk-parser-unpack"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_development_dependency 'embulk', ['>= 0.8.9']
17
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
18
+ spec.add_development_dependency 'rake', ['>= 10.0']
19
+ end
@@ -0,0 +1,63 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "unpack" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ #require "embulk/parser/unpack.rb"
12
+
13
+ #class Unpack < GuessPlugin
14
+ # Plugin.register_guess("unpack", self)
15
+ #
16
+ # def guess(config, sample_buffer)
17
+ # if sample_buffer[0,2] == GZIP_HEADER
18
+ # guessed = {}
19
+ # guessed["type"] = "unpack"
20
+ # guessed["property1"] = "guessed-value"
21
+ # return {"parser" => guessed}
22
+ # else
23
+ # return {}
24
+ # end
25
+ # end
26
+ #end
27
+
28
+ #class Unpack < TextGuessPlugin
29
+ # Plugin.register_guess("unpack", self)
30
+ #
31
+ # def guess_text(config, sample_text)
32
+ # js = JSON.parse(sample_text) rescue nil
33
+ # if js && js["mykeyword"] == "keyword"
34
+ # guessed = {}
35
+ # guessed["type"] = "unpack"
36
+ # guessed["property1"] = "guessed-value"
37
+ # return {"parser" => guessed}
38
+ # else
39
+ # return {}
40
+ # end
41
+ # end
42
+ #end
43
+
44
+ #class Unpack < LineGuessPlugin
45
+ # Plugin.register_guess("unpack", self)
46
+ #
47
+ # def guess_lines(config, sample_lines)
48
+ # all_line_matched = sample_lines.all? do |line|
49
+ # line =~ /mypattern/
50
+ # end
51
+ # if all_line_matched
52
+ # guessed = {}
53
+ # guessed["type"] = "unpack"
54
+ # guessed["property1"] = "guessed-value"
55
+ # return {"parser" => guessed}
56
+ # else
57
+ # return {}
58
+ # end
59
+ # end
60
+ #end
61
+
62
+ end
63
+ end
@@ -0,0 +1,56 @@
1
+ module Embulk
2
+ module Parser
3
+
4
+ class Unpack < ParserPlugin
5
+ Plugin.register_parser("unpack", self)
6
+
7
+ def self.transaction(config, &control)
8
+ decoder_task = config.load_config(Java::LineDecoder::DecoderTask)
9
+
10
+ # configuration code:
11
+ task = {
12
+ "decoder" => DataSource.from_java(decoder_task.dump),
13
+ "format" => config.param("format", :string),
14
+ }
15
+
16
+ columns = []
17
+ schema = config.param("columns", :array, default: [])
18
+ schema.each do |column|
19
+ name = column["name"]
20
+ type = column["type"].to_sym
21
+
22
+ columns << Column.new(nil, name, type)
23
+ end
24
+
25
+ yield(task, columns)
26
+ end
27
+
28
+ def init
29
+ @format = task["format"]
30
+ @decoder = task.param("decoder", :hash).load_task(Java::LineDecoder::DecoderTask)
31
+
32
+ end
33
+
34
+
35
+ def run(file_input)
36
+ decoder = Java::LineDecoder.new(file_input.to_java, @decoder)
37
+
38
+ while decoder.nextFile
39
+ while line = decoder.poll
40
+ process_line(line)
41
+ end
42
+ end
43
+
44
+ page_builder.finish
45
+ end
46
+
47
+ private
48
+
49
+ def process_line(line)
50
+ values = line.unpack(@format)
51
+ page_builder.add(values)
52
+ end
53
+
54
+ end
55
+ end
56
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-unpack
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Karri Niemelä
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-09-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 0.8.9
19
+ name: embulk
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.9
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.10.6
33
+ name: bundler
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.10.6
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '10.0'
47
+ name: rake
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description: Parses files read by other file input plugins.
56
+ email:
57
+ - kakoni@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - Gemfile
63
+ - LICENSE
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - embulk-parser-unpack.gemspec
68
+ - lib/embulk/guess/unpack.rb
69
+ - lib/embulk/parser/unpack.rb
70
+ homepage: https://github.com/kakoni/embulk-parser-unpack
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.4.8
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Unpack parser plugin for Embulk
94
+ test_files: []