embulk-parser-mysqldump_tab 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 540119b437ab3189dafba2b577bd0ef444c72583
4
+ data.tar.gz: ac15037cbdaab0a87a50a392837f28b14e742e65
5
+ SHA512:
6
+ metadata.gz: a5f756f601edcb1bd9f2eec7845cb9775b165c1c65ce99426160542d24f0b19ee604487364bdbe78614c7c6bfb129ba045ce211780697ac7eda13df0ead88653
7
+ data.tar.gz: 53ea4300b9ed686a312d4ce8a8da9d82a843c14eeaaa59274a669d12c209310fe0399088ec43126c87c264e5ba289f1ef4b78518d02dc3c5a73e578415c497ca
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.1.5.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # Mysqldump Tab parser plugin for Embulk
2
+
3
+ Embulk parser plugin for mysqldump file that dumped with the --tab option
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+ * **Guess supported**: no
9
+
10
+ ## Configuration
11
+
12
+
13
+ ## Example
14
+
15
+ ```yaml
16
+ in:
17
+ type: file
18
+ path_prefix: /path/to/dump/users.txt
19
+ parser:
20
+ type: mysqldump_tab
21
+ columns:
22
+ - {name: id, type: long}
23
+ - {name: name, type: string}
24
+ - {name: email, type: string}
25
+ out:
26
+ type: stdout
27
+ ```
28
+
29
+
30
+ ```
31
+ $ embulk gem install embulk-parser-mysqldump_tab
32
+ $ embulk guess -g mysqldump_tab config.yml -o guessed.yml
33
+ ```
34
+
35
+ ## Build
36
+
37
+ ```
38
+ $ rake
39
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,19 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-parser-mysqldump_tab"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["inouet"]
6
+ spec.summary = "Mysqldump Tab parser plugin for Embulk"
7
+ spec.description = "Embulk parser plugin for mysqldump file that dumped with the --tab option."
8
+ spec.email = ["inudog@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/inouet/embulk-parser-mysqldump_tab"
11
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
12
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
+ spec.require_paths = ["lib"]
14
+
15
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
16
+ spec.add_development_dependency 'embulk', ['>= 0.8.23']
17
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
18
+ spec.add_development_dependency 'rake', ['>= 10.0']
19
+ end
@@ -0,0 +1,63 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "mysqldump_tab" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ # require "embulk/parser/mysqldump_tab.rb"
12
+
13
+ # class MysqldumpTab < GuessPlugin
14
+ # Plugin.register_guess("mysqldump_tab", self)
15
+ #
16
+ # def guess(config, sample_buffer)
17
+ # if sample_buffer[0,2] == GZIP_HEADER
18
+ # guessed = {}
19
+ # guessed["type"] = "mysqldump_tab"
20
+ # guessed["property1"] = "guessed-value"
21
+ # return {"parser" => guessed}
22
+ # else
23
+ # return {}
24
+ # end
25
+ # end
26
+ # end
27
+
28
+ # class MysqldumpTab < TextGuessPlugin
29
+ # Plugin.register_guess("mysqldump_tab", self)
30
+ #
31
+ # def guess_text(config, sample_text)
32
+ # js = JSON.parse(sample_text) rescue nil
33
+ # if js && js["mykeyword"] == "keyword"
34
+ # guessed = {}
35
+ # guessed["type"] = "mysqldump_tab"
36
+ # guessed["property1"] = "guessed-value"
37
+ # return {"parser" => guessed}
38
+ # else
39
+ # return {}
40
+ # end
41
+ # end
42
+ # end
43
+
44
+ # class MysqldumpTab < LineGuessPlugin
45
+ # Plugin.register_guess("mysqldump_tab", self)
46
+ #
47
+ # def guess_lines(config, sample_lines)
48
+ # all_line_matched = sample_lines.all? do |line|
49
+ # line =~ /mypattern/
50
+ # end
51
+ # if all_line_matched
52
+ # guessed = {}
53
+ # guessed["type"] = "mysqldump_tab"
54
+ # guessed["property1"] = "guessed-value"
55
+ # return {"parser" => guessed}
56
+ # else
57
+ # return {}
58
+ # end
59
+ # end
60
+ # end
61
+
62
+ end
63
+ end
@@ -0,0 +1,105 @@
1
+ module Embulk
2
+ module Parser
3
+
4
+ class MysqldumpTab < ParserPlugin
5
+
6
+ DUMMY_STRING = "\v"
7
+ FIELDS_TERMINATED_BY = "\t"
8
+ FIELDS_ESCAPED_BY = '\\'
9
+ FIELDS_ENCLOSED_BY = ''
10
+ LINES_TERMINATED_BY = "\n"
11
+
12
+ Plugin.register_parser("mysqldump_tab", self)
13
+
14
+ def self.transaction(config, &control)
15
+ # configuration code:
16
+
17
+ parser_task = config.load_config(Java::LineDecoder::DecoderTask)
18
+
19
+ task = {
20
+ "decoder_task" => DataSource.from_java(parser_task.dump)
21
+ # "option1" => config.param("option1", :integer), # integer, required
22
+ # "option2" => config.param("option2", :string, default: "myvalue"), # string, optional
23
+ # "option3" => config.param("option3", :string, default: nil), # string, optional
24
+ }
25
+
26
+ # https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
27
+ attributes = {}
28
+ columns = config.param(:columns, :array).map do |column|
29
+ name = column["name"]
30
+ type = column["type"].to_sym
31
+ attributes[name] = type
32
+ Column.new(nil, name, type, column["format"])
33
+ end
34
+
35
+ task[:attributes] = attributes
36
+
37
+ # parser option
38
+ # task[:option1] = config['option1']
39
+ # task[:option1] = config.param(:option1, :integer, default: 5)
40
+
41
+ yield(task, columns)
42
+ end
43
+
44
+ def init
45
+ # initialization code:
46
+ # @option1 = task["option1"]
47
+ # @option2 = task["option2"]
48
+ # @option3 = task["option3"]
49
+
50
+ @decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
51
+ end
52
+
53
+ def run(file_input)
54
+ decoder = Java::LineDecoder.new(file_input.instance_eval { @java_file_input }, @decoder_task)
55
+
56
+ while decoder.nextFile
57
+ buffer = ''
58
+ while line = decoder.poll
59
+ buffer = buffer + line
60
+ if in_column?(line)
61
+ buffer = buffer.gsub(/#{Regexp.escape(FIELDS_ESCAPED_BY)}/, LINES_TERMINATED_BY)
62
+ next
63
+ end
64
+ cols = parse_line(buffer)
65
+ page_builder.add(cols)
66
+ buffer = ''
67
+ end
68
+
69
+ # When output has not ended
70
+ if buffer.length > 0
71
+ cols = parse_line(buffer)
72
+ page_builder.add(cols)
73
+ end
74
+ end
75
+
76
+ page_builder.finish
77
+ end
78
+
79
+ def parse_line(line)
80
+ # Escape "escaped TAB" temporarily
81
+ line = line.gsub(/\\#{FIELDS_TERMINATED_BY}/, DUMMY_STRING)
82
+
83
+ # Split with separator (TAB)
84
+ cols = line.split(FIELDS_TERMINATED_BY)
85
+ cols.map! { |item| item.gsub(/#{DUMMY_STRING}/, FIELDS_TERMINATED_BY) }
86
+
87
+ len = task[:attributes].length
88
+ cols = adjust_column(cols, len)
89
+ return cols
90
+ end
91
+
92
+ def in_column?(line)
93
+ /#{Regexp.escape(FIELDS_ESCAPED_BY)}$/.match(line) ? true : false # escaped new line
94
+ end
95
+
96
+ # Adjust array length
97
+ def adjust_column(arr, len)
98
+ arr = arr.slice(0, len) # Truncate if more than len
99
+ arr.fill(0, len) { |i| arr[i] } # If it is less than len, fill it with nil
100
+ end
101
+
102
+ end
103
+
104
+ end
105
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-mysqldump_tab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - inouet
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-10-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: embulk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.23
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.23
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.10.6
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.10.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description: Embulk parser plugin for mysqldump file that dumped with the --tab option.
56
+ email:
57
+ - inudog@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - .ruby-version
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - embulk-parser-mysqldump_tab.gemspec
69
+ - lib/embulk/guess/mysqldump_tab.rb
70
+ - lib/embulk/parser/mysqldump_tab.rb
71
+ homepage: https://github.com/inouet/embulk-parser-mysqldump_tab
72
+ licenses:
73
+ - MIT
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - '>='
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.0.14.1
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Mysqldump Tab parser plugin for Embulk
95
+ test_files: []