embulk-parser-mysqldump_tab 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.ruby-version +1 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +3 -0
- data/embulk-parser-mysqldump_tab.gemspec +19 -0
- data/lib/embulk/guess/mysqldump_tab.rb +63 -0
- data/lib/embulk/parser/mysqldump_tab.rb +105 -0
- metadata +95 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 540119b437ab3189dafba2b577bd0ef444c72583
         | 
| 4 | 
            +
              data.tar.gz: ac15037cbdaab0a87a50a392837f28b14e742e65
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: a5f756f601edcb1bd9f2eec7845cb9775b165c1c65ce99426160542d24f0b19ee604487364bdbe78614c7c6bfb129ba045ce211780697ac7eda13df0ead88653
         | 
| 7 | 
            +
              data.tar.gz: 53ea4300b9ed686a312d4ce8a8da9d82a843c14eeaaa59274a669d12c209310fe0399088ec43126c87c264e5ba289f1ef4b78518d02dc3c5a73e578415c497ca
         | 
    
        data/.ruby-version
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            jruby-9.1.5.0
         | 
    
        data/Gemfile
    ADDED
    
    
    
        data/LICENSE.txt
    ADDED
    
    | @@ -0,0 +1,21 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            MIT License
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining
         | 
| 5 | 
            +
            a copy of this software and associated documentation files (the
         | 
| 6 | 
            +
            "Software"), to deal in the Software without restriction, including
         | 
| 7 | 
            +
            without limitation the rights to use, copy, modify, merge, publish,
         | 
| 8 | 
            +
            distribute, sublicense, and/or sell copies of the Software, and to
         | 
| 9 | 
            +
            permit persons to whom the Software is furnished to do so, subject to
         | 
| 10 | 
            +
            the following conditions:
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            The above copyright notice and this permission notice shall be
         | 
| 13 | 
            +
            included in all copies or substantial portions of the Software.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         | 
| 16 | 
            +
            EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         | 
| 17 | 
            +
            MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         | 
| 18 | 
            +
            NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
         | 
| 19 | 
            +
            LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
         | 
| 20 | 
            +
            OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
         | 
| 21 | 
            +
            WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         | 
    
        data/README.md
    ADDED
    
    | @@ -0,0 +1,39 @@ | |
| 1 | 
            +
            # Mysqldump Tab parser plugin for Embulk
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Embulk parser plugin for mysqldump file that dumped with the --tab option
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ## Overview
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            * **Plugin type**: parser
         | 
| 8 | 
            +
            * **Guess supported**: no
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            ## Configuration
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            ## Example
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            ```yaml
         | 
| 16 | 
            +
            in:
         | 
| 17 | 
            +
              type: file
         | 
| 18 | 
            +
              path_prefix: /path/to/dump/users.txt
         | 
| 19 | 
            +
              parser:
         | 
| 20 | 
            +
                type: mysqldump_tab
         | 
| 21 | 
            +
                columns:
         | 
| 22 | 
            +
                - {name: id, type: long}
         | 
| 23 | 
            +
                - {name: name, type: string}
         | 
| 24 | 
            +
                - {name: email, type: string}
         | 
| 25 | 
            +
            out:
         | 
| 26 | 
            +
              type: stdout
         | 
| 27 | 
            +
            ```
         | 
| 28 | 
            +
             | 
| 29 | 
            +
             | 
| 30 | 
            +
            ```
         | 
| 31 | 
            +
            $ embulk gem install embulk-parser-mysqldump_tab
         | 
| 32 | 
            +
            $ embulk guess -g mysqldump_tab config.yml -o guessed.yml
         | 
| 33 | 
            +
            ```
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            ## Build
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            ```
         | 
| 38 | 
            +
            $ rake
         | 
| 39 | 
            +
            ```
         | 
    
        data/Rakefile
    ADDED
    
    
| @@ -0,0 +1,19 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            Gem::Specification.new do |spec|
         | 
| 3 | 
            +
              spec.name          = "embulk-parser-mysqldump_tab"
         | 
| 4 | 
            +
              spec.version       = "0.1.0"
         | 
| 5 | 
            +
              spec.authors       = ["inouet"]
         | 
| 6 | 
            +
              spec.summary       = "Mysqldump Tab parser plugin for Embulk"
         | 
| 7 | 
            +
              spec.description   = "Embulk parser plugin for mysqldump file that dumped with the --tab option."
         | 
| 8 | 
            +
              spec.email         = ["inudog@gmail.com"]
         | 
| 9 | 
            +
              spec.licenses      = ["MIT"]
         | 
| 10 | 
            +
              spec.homepage      = "https://github.com/inouet/embulk-parser-mysqldump_tab"
         | 
| 11 | 
            +
              spec.files         = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
         | 
| 12 | 
            +
              spec.test_files    = spec.files.grep(%r{^(test|spec)/})
         | 
| 13 | 
            +
              spec.require_paths = ["lib"]
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
         | 
| 16 | 
            +
              spec.add_development_dependency 'embulk', ['>= 0.8.23']
         | 
| 17 | 
            +
              spec.add_development_dependency 'bundler', ['>= 1.10.6']
         | 
| 18 | 
            +
              spec.add_development_dependency 'rake', ['>= 10.0']
         | 
| 19 | 
            +
            end
         | 
| @@ -0,0 +1,63 @@ | |
| 1 | 
            +
            module Embulk
         | 
| 2 | 
            +
              module Guess
         | 
| 3 | 
            +
             | 
| 4 | 
            +
                # TODO implement guess plugin to make this command work:
         | 
| 5 | 
            +
                #      $ embulk guess -g "mysqldump_tab" partial-config.yml
         | 
| 6 | 
            +
                #
         | 
| 7 | 
            +
                #      Depending on the file format the plugin uses, you can use choose
         | 
| 8 | 
            +
                #      one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
         | 
| 9 | 
            +
                #      or line guess (LineGuessPlugin).
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                # require "embulk/parser/mysqldump_tab.rb"
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                # class MysqldumpTab < GuessPlugin
         | 
| 14 | 
            +
                #   Plugin.register_guess("mysqldump_tab", self)
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                #   def guess(config, sample_buffer)
         | 
| 17 | 
            +
                #     if sample_buffer[0,2] == GZIP_HEADER
         | 
| 18 | 
            +
                #       guessed = {}
         | 
| 19 | 
            +
                #       guessed["type"] = "mysqldump_tab"
         | 
| 20 | 
            +
                #       guessed["property1"] = "guessed-value"
         | 
| 21 | 
            +
                #       return {"parser" => guessed}
         | 
| 22 | 
            +
                #     else
         | 
| 23 | 
            +
                #       return {}
         | 
| 24 | 
            +
                #     end
         | 
| 25 | 
            +
                #   end
         | 
| 26 | 
            +
                # end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                # class MysqldumpTab < TextGuessPlugin
         | 
| 29 | 
            +
                #   Plugin.register_guess("mysqldump_tab", self)
         | 
| 30 | 
            +
                #
         | 
| 31 | 
            +
                #   def guess_text(config, sample_text)
         | 
| 32 | 
            +
                #     js = JSON.parse(sample_text) rescue nil
         | 
| 33 | 
            +
                #     if js && js["mykeyword"] == "keyword"
         | 
| 34 | 
            +
                #       guessed = {}
         | 
| 35 | 
            +
                #       guessed["type"] = "mysqldump_tab"
         | 
| 36 | 
            +
                #       guessed["property1"] = "guessed-value"
         | 
| 37 | 
            +
                #       return {"parser" => guessed}
         | 
| 38 | 
            +
                #     else
         | 
| 39 | 
            +
                #       return {}
         | 
| 40 | 
            +
                #     end
         | 
| 41 | 
            +
                #   end
         | 
| 42 | 
            +
                # end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # class MysqldumpTab < LineGuessPlugin
         | 
| 45 | 
            +
                #   Plugin.register_guess("mysqldump_tab", self)
         | 
| 46 | 
            +
                #
         | 
| 47 | 
            +
                #   def guess_lines(config, sample_lines)
         | 
| 48 | 
            +
                #     all_line_matched = sample_lines.all? do |line|
         | 
| 49 | 
            +
                #       line =~ /mypattern/
         | 
| 50 | 
            +
                #     end
         | 
| 51 | 
            +
                #     if all_line_matched
         | 
| 52 | 
            +
                #       guessed = {}
         | 
| 53 | 
            +
                #       guessed["type"] = "mysqldump_tab"
         | 
| 54 | 
            +
                #       guessed["property1"] = "guessed-value"
         | 
| 55 | 
            +
                #       return {"parser" => guessed}
         | 
| 56 | 
            +
                #     else
         | 
| 57 | 
            +
                #       return {}
         | 
| 58 | 
            +
                #     end
         | 
| 59 | 
            +
                #   end
         | 
| 60 | 
            +
                # end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
              end
         | 
| 63 | 
            +
            end
         | 
| @@ -0,0 +1,105 @@ | |
| 1 | 
            +
            module Embulk
         | 
| 2 | 
            +
              module Parser
         | 
| 3 | 
            +
             | 
| 4 | 
            +
                class MysqldumpTab < ParserPlugin
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                  DUMMY_STRING         = "\v"
         | 
| 7 | 
            +
                  FIELDS_TERMINATED_BY = "\t"
         | 
| 8 | 
            +
                  FIELDS_ESCAPED_BY    = '\\'
         | 
| 9 | 
            +
                  FIELDS_ENCLOSED_BY   = ''
         | 
| 10 | 
            +
                  LINES_TERMINATED_BY  = "\n"
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  Plugin.register_parser("mysqldump_tab", self)
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  def self.transaction(config, &control)
         | 
| 15 | 
            +
                    # configuration code:
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                    parser_task = config.load_config(Java::LineDecoder::DecoderTask)
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                    task = {
         | 
| 20 | 
            +
                      "decoder_task" => DataSource.from_java(parser_task.dump)
         | 
| 21 | 
            +
                      # "option1" => config.param("option1", :integer),                     # integer, required
         | 
| 22 | 
            +
                      # "option2" => config.param("option2", :string, default: "myvalue"),  # string, optional
         | 
| 23 | 
            +
                      # "option3" => config.param("option3", :string, default: nil),        # string, optional
         | 
| 24 | 
            +
                    }
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                    # https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
         | 
| 27 | 
            +
                    attributes = {}
         | 
| 28 | 
            +
                    columns = config.param(:columns, :array).map do |column|
         | 
| 29 | 
            +
                      name = column["name"]
         | 
| 30 | 
            +
                      type = column["type"].to_sym
         | 
| 31 | 
            +
                      attributes[name] = type
         | 
| 32 | 
            +
                      Column.new(nil, name, type, column["format"])
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    task[:attributes] = attributes
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    # parser option
         | 
| 38 | 
            +
                    # task[:option1] = config['option1']
         | 
| 39 | 
            +
                    # task[:option1] = config.param(:option1, :integer, default: 5)
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                    yield(task, columns)
         | 
| 42 | 
            +
                  end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  def init
         | 
| 45 | 
            +
                    # initialization code:
         | 
| 46 | 
            +
                    # @option1 = task["option1"]
         | 
| 47 | 
            +
                    # @option2 = task["option2"]
         | 
| 48 | 
            +
                    # @option3 = task["option3"]
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                    @decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  def run(file_input)
         | 
| 54 | 
            +
                    decoder = Java::LineDecoder.new(file_input.instance_eval { @java_file_input }, @decoder_task)
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                    while decoder.nextFile
         | 
| 57 | 
            +
                      buffer = ''
         | 
| 58 | 
            +
                      while line = decoder.poll
         | 
| 59 | 
            +
                        buffer = buffer + line
         | 
| 60 | 
            +
                        if in_column?(line)
         | 
| 61 | 
            +
                          buffer = buffer.gsub(/#{Regexp.escape(FIELDS_ESCAPED_BY)}/, LINES_TERMINATED_BY)
         | 
| 62 | 
            +
                          next
         | 
| 63 | 
            +
                        end
         | 
| 64 | 
            +
                        cols = parse_line(buffer)
         | 
| 65 | 
            +
                        page_builder.add(cols)
         | 
| 66 | 
            +
                        buffer = ''
         | 
| 67 | 
            +
                      end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                      # When output has not ended
         | 
| 70 | 
            +
                      if buffer.length > 0
         | 
| 71 | 
            +
                          cols = parse_line(buffer)
         | 
| 72 | 
            +
                          page_builder.add(cols)
         | 
| 73 | 
            +
                      end
         | 
| 74 | 
            +
                    end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    page_builder.finish
         | 
| 77 | 
            +
                  end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                  def parse_line(line)
         | 
| 80 | 
            +
                    # Escape "escaped TAB" temporarily
         | 
| 81 | 
            +
                    line = line.gsub(/\\#{FIELDS_TERMINATED_BY}/, DUMMY_STRING)
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                    # Split with separator (TAB)
         | 
| 84 | 
            +
                    cols = line.split(FIELDS_TERMINATED_BY)
         | 
| 85 | 
            +
                    cols.map! { |item| item.gsub(/#{DUMMY_STRING}/, FIELDS_TERMINATED_BY) }
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                    len = task[:attributes].length
         | 
| 88 | 
            +
                    cols = adjust_column(cols, len)
         | 
| 89 | 
            +
                    return cols
         | 
| 90 | 
            +
                  end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                  def in_column?(line)
         | 
| 93 | 
            +
                    /#{Regexp.escape(FIELDS_ESCAPED_BY)}$/.match(line) ? true : false # escaped new line
         | 
| 94 | 
            +
                  end
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                  # Adjust array length
         | 
| 97 | 
            +
                  def adjust_column(arr, len)
         | 
| 98 | 
            +
                    arr = arr.slice(0, len) # Truncate if more than len
         | 
| 99 | 
            +
                    arr.fill(0, len) { |i| arr[i] } # If it is less than len, fill it with nil
         | 
| 100 | 
            +
                  end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
              end
         | 
| 105 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,95 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: embulk-parser-mysqldump_tab
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.1.0
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - inouet
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2017-10-06 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: embulk
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - '>='
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: 0.8.23
         | 
| 20 | 
            +
              type: :development
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - '>='
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: 0.8.23
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: bundler
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - '>='
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: 1.10.6
         | 
| 34 | 
            +
              type: :development
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - '>='
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: 1.10.6
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: rake
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - '>='
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '10.0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - '>='
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '10.0'
         | 
| 55 | 
            +
            description: Embulk parser plugin for mysqldump file that dumped with the --tab option.
         | 
| 56 | 
            +
            email:
         | 
| 57 | 
            +
            - inudog@gmail.com
         | 
| 58 | 
            +
            executables: []
         | 
| 59 | 
            +
            extensions: []
         | 
| 60 | 
            +
            extra_rdoc_files: []
         | 
| 61 | 
            +
            files:
         | 
| 62 | 
            +
            - .gitignore
         | 
| 63 | 
            +
            - .ruby-version
         | 
| 64 | 
            +
            - Gemfile
         | 
| 65 | 
            +
            - LICENSE.txt
         | 
| 66 | 
            +
            - README.md
         | 
| 67 | 
            +
            - Rakefile
         | 
| 68 | 
            +
            - embulk-parser-mysqldump_tab.gemspec
         | 
| 69 | 
            +
            - lib/embulk/guess/mysqldump_tab.rb
         | 
| 70 | 
            +
            - lib/embulk/parser/mysqldump_tab.rb
         | 
| 71 | 
            +
            homepage: https://github.com/inouet/embulk-parser-mysqldump_tab
         | 
| 72 | 
            +
            licenses:
         | 
| 73 | 
            +
            - MIT
         | 
| 74 | 
            +
            metadata: {}
         | 
| 75 | 
            +
            post_install_message: 
         | 
| 76 | 
            +
            rdoc_options: []
         | 
| 77 | 
            +
            require_paths:
         | 
| 78 | 
            +
            - lib
         | 
| 79 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 80 | 
            +
              requirements:
         | 
| 81 | 
            +
              - - '>='
         | 
| 82 | 
            +
                - !ruby/object:Gem::Version
         | 
| 83 | 
            +
                  version: '0'
         | 
| 84 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 85 | 
            +
              requirements:
         | 
| 86 | 
            +
              - - '>='
         | 
| 87 | 
            +
                - !ruby/object:Gem::Version
         | 
| 88 | 
            +
                  version: '0'
         | 
| 89 | 
            +
            requirements: []
         | 
| 90 | 
            +
            rubyforge_project: 
         | 
| 91 | 
            +
            rubygems_version: 2.0.14.1
         | 
| 92 | 
            +
            signing_key: 
         | 
| 93 | 
            +
            specification_version: 4
         | 
| 94 | 
            +
            summary: Mysqldump Tab parser plugin for Embulk
         | 
| 95 | 
            +
            test_files: []
         |