embulk-parser-mysqldump_tab 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 540119b437ab3189dafba2b577bd0ef444c72583
4
+ data.tar.gz: ac15037cbdaab0a87a50a392837f28b14e742e65
5
+ SHA512:
6
+ metadata.gz: a5f756f601edcb1bd9f2eec7845cb9775b165c1c65ce99426160542d24f0b19ee604487364bdbe78614c7c6bfb129ba045ce211780697ac7eda13df0ead88653
7
+ data.tar.gz: 53ea4300b9ed686a312d4ce8a8da9d82a843c14eeaaa59274a669d12c209310fe0399088ec43126c87c264e5ba289f1ef4b78518d02dc3c5a73e578415c497ca
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.1.5.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # Mysqldump Tab parser plugin for Embulk
2
+
3
+ Embulk parser plugin for mysqldump file that dumped with the --tab option
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+ * **Guess supported**: no
9
+
10
+ ## Configuration
11
+
12
+
13
+ ## Example
14
+
15
+ ```yaml
16
+ in:
17
+ type: file
18
+ path_prefix: /path/to/dump/users.txt
19
+ parser:
20
+ type: mysqldump_tab
21
+ columns:
22
+ - {name: id, type: long}
23
+ - {name: name, type: string}
24
+ - {name: email, type: string}
25
+ out:
26
+ type: stdout
27
+ ```
28
+
29
+
30
+ ```
31
+ $ embulk gem install embulk-parser-mysqldump_tab
32
+ $ embulk guess -g mysqldump_tab config.yml -o guessed.yml
33
+ ```
34
+
35
+ ## Build
36
+
37
+ ```
38
+ $ rake
39
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,19 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-parser-mysqldump_tab"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["inouet"]
6
+ spec.summary = "Mysqldump Tab parser plugin for Embulk"
7
+ spec.description = "Embulk parser plugin for mysqldump file that dumped with the --tab option."
8
+ spec.email = ["inudog@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/inouet/embulk-parser-mysqldump_tab"
11
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
12
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
+ spec.require_paths = ["lib"]
14
+
15
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
16
+ spec.add_development_dependency 'embulk', ['>= 0.8.23']
17
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
18
+ spec.add_development_dependency 'rake', ['>= 10.0']
19
+ end
@@ -0,0 +1,63 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "mysqldump_tab" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ # require "embulk/parser/mysqldump_tab.rb"
12
+
13
+ # class MysqldumpTab < GuessPlugin
14
+ # Plugin.register_guess("mysqldump_tab", self)
15
+ #
16
+ # def guess(config, sample_buffer)
17
+ # if sample_buffer[0,2] == GZIP_HEADER
18
+ # guessed = {}
19
+ # guessed["type"] = "mysqldump_tab"
20
+ # guessed["property1"] = "guessed-value"
21
+ # return {"parser" => guessed}
22
+ # else
23
+ # return {}
24
+ # end
25
+ # end
26
+ # end
27
+
28
+ # class MysqldumpTab < TextGuessPlugin
29
+ # Plugin.register_guess("mysqldump_tab", self)
30
+ #
31
+ # def guess_text(config, sample_text)
32
+ # js = JSON.parse(sample_text) rescue nil
33
+ # if js && js["mykeyword"] == "keyword"
34
+ # guessed = {}
35
+ # guessed["type"] = "mysqldump_tab"
36
+ # guessed["property1"] = "guessed-value"
37
+ # return {"parser" => guessed}
38
+ # else
39
+ # return {}
40
+ # end
41
+ # end
42
+ # end
43
+
44
+ # class MysqldumpTab < LineGuessPlugin
45
+ # Plugin.register_guess("mysqldump_tab", self)
46
+ #
47
+ # def guess_lines(config, sample_lines)
48
+ # all_line_matched = sample_lines.all? do |line|
49
+ # line =~ /mypattern/
50
+ # end
51
+ # if all_line_matched
52
+ # guessed = {}
53
+ # guessed["type"] = "mysqldump_tab"
54
+ # guessed["property1"] = "guessed-value"
55
+ # return {"parser" => guessed}
56
+ # else
57
+ # return {}
58
+ # end
59
+ # end
60
+ # end
61
+
62
+ end
63
+ end
@@ -0,0 +1,105 @@
1
+ module Embulk
2
+ module Parser
3
+
4
+ class MysqldumpTab < ParserPlugin
5
+
6
+ DUMMY_STRING = "\v"
7
+ FIELDS_TERMINATED_BY = "\t"
8
+ FIELDS_ESCAPED_BY = '\\'
9
+ FIELDS_ENCLOSED_BY = ''
10
+ LINES_TERMINATED_BY = "\n"
11
+
12
+ Plugin.register_parser("mysqldump_tab", self)
13
+
14
+ def self.transaction(config, &control)
15
+ # configuration code:
16
+
17
+ parser_task = config.load_config(Java::LineDecoder::DecoderTask)
18
+
19
+ task = {
20
+ "decoder_task" => DataSource.from_java(parser_task.dump)
21
+ # "option1" => config.param("option1", :integer), # integer, required
22
+ # "option2" => config.param("option2", :string, default: "myvalue"), # string, optional
23
+ # "option3" => config.param("option3", :string, default: nil), # string, optional
24
+ }
25
+
26
+ # https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
27
+ attributes = {}
28
+ columns = config.param(:columns, :array).map do |column|
29
+ name = column["name"]
30
+ type = column["type"].to_sym
31
+ attributes[name] = type
32
+ Column.new(nil, name, type, column["format"])
33
+ end
34
+
35
+ task[:attributes] = attributes
36
+
37
+ # parser option
38
+ # task[:option1] = config['option1']
39
+ # task[:option1] = config.param(:option1, :integer, default: 5)
40
+
41
+ yield(task, columns)
42
+ end
43
+
44
+ def init
45
+ # initialization code:
46
+ # @option1 = task["option1"]
47
+ # @option2 = task["option2"]
48
+ # @option3 = task["option3"]
49
+
50
+ @decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
51
+ end
52
+
53
+ def run(file_input)
54
+ decoder = Java::LineDecoder.new(file_input.instance_eval { @java_file_input }, @decoder_task)
55
+
56
+ while decoder.nextFile
57
+ buffer = ''
58
+ while line = decoder.poll
59
+ buffer = buffer + line
60
+ if in_column?(line)
61
+ buffer = buffer.gsub(/#{Regexp.escape(FIELDS_ESCAPED_BY)}/, LINES_TERMINATED_BY)
62
+ next
63
+ end
64
+ cols = parse_line(buffer)
65
+ page_builder.add(cols)
66
+ buffer = ''
67
+ end
68
+
69
+ # When output has not ended
70
+ if buffer.length > 0
71
+ cols = parse_line(buffer)
72
+ page_builder.add(cols)
73
+ end
74
+ end
75
+
76
+ page_builder.finish
77
+ end
78
+
79
+ def parse_line(line)
80
+ # Escape "escaped TAB" temporarily
81
+ line = line.gsub(/\\#{FIELDS_TERMINATED_BY}/, DUMMY_STRING)
82
+
83
+ # Split with separator (TAB)
84
+ cols = line.split(FIELDS_TERMINATED_BY)
85
+ cols.map! { |item| item.gsub(/#{DUMMY_STRING}/, FIELDS_TERMINATED_BY) }
86
+
87
+ len = task[:attributes].length
88
+ cols = adjust_column(cols, len)
89
+ return cols
90
+ end
91
+
92
+ def in_column?(line)
93
+ /#{Regexp.escape(FIELDS_ESCAPED_BY)}$/.match(line) ? true : false # escaped new line
94
+ end
95
+
96
+ # Adjust array length
97
+ def adjust_column(arr, len)
98
+ arr = arr.slice(0, len) # Truncate if more than len
99
+ arr.fill(0, len) { |i| arr[i] } # If it is less than len, fill it with nil
100
+ end
101
+
102
+ end
103
+
104
+ end
105
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-mysqldump_tab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - inouet
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-10-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: embulk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.23
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.23
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.10.6
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.10.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description: Embulk parser plugin for mysqldump file that dumped with the --tab option.
56
+ email:
57
+ - inudog@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - .ruby-version
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - embulk-parser-mysqldump_tab.gemspec
69
+ - lib/embulk/guess/mysqldump_tab.rb
70
+ - lib/embulk/parser/mysqldump_tab.rb
71
+ homepage: https://github.com/inouet/embulk-parser-mysqldump_tab
72
+ licenses:
73
+ - MIT
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - '>='
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.0.14.1
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Mysqldump Tab parser plugin for Embulk
95
+ test_files: []