embulk-guess-csv_verify 0.10.29-java → 0.10.30-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/aopalliance-1.0.jar +0 -0
- data/classpath/bval-core-0.5.jar +0 -0
- data/classpath/bval-jsr303-0.5.jar +0 -0
- data/classpath/commons-beanutils-core-1.8.3.jar +0 -0
- data/classpath/commons-lang3-3.4.jar +0 -0
- data/classpath/embulk-api-0.10.30.jar +0 -0
- data/classpath/embulk-core-0.10.30.jar +0 -0
- data/classpath/embulk-guess-csv-0.10.30.jar +0 -0
- data/classpath/embulk-guess-csv_verify-0.10.30.jar +0 -0
- data/classpath/{embulk-parser-csv-0.10.29.jar → embulk-parser-csv-0.10.30.jar} +0 -0
- data/classpath/embulk-spi-0.10.30.jar +0 -0
- data/classpath/guava-18.0.jar +0 -0
- data/classpath/guice-4.0.jar +0 -0
- data/classpath/guice-multibindings-4.0.jar +0 -0
- data/classpath/jackson-datatype-guava-2.6.7.jar +0 -0
- data/classpath/jackson-module-guice-2.6.7.jar +0 -0
- data/classpath/javax.inject-1.jar +0 -0
- data/classpath/msgpack-core-0.8.11.jar +0 -0
- data/classpath/slf4j-api-1.7.30.jar +0 -0
- data/lib/embulk/guess/csv_verify.rb +20 -30
- metadata +24 -6
- data/classpath/embulk-guess-csv-0.10.29.jar +0 -0
- data/classpath/embulk-guess-csv_verify-0.10.29.jar +0 -0
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: c980881f2e59cd43b69eebe56691108f9a9ff2436bd9d2446b2b22330bf8f88c
         | 
| 4 | 
            +
              data.tar.gz: da1cf6e385c0b77d8b3d1aca9076e5ab5f2a44e81ceb357207837bcfd255de0f
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: a3c223339d066fd2a4241232cdd6362e0836d051eb7a209353bbb4879c800fcc561ede18005b4c257e062119271e000f36cf3972afbce164f37fb7b6e51fad86
         | 
| 7 | 
            +
              data.tar.gz: 61a444758472772ff192f932dabe6ed9fea417c413ec03203782931a0efe0d44f270686cb546870ec7bdc6a732edd89df27d97605e23a2d81838c08eb4e95511
         | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| @@ -25,13 +25,13 @@ module Embulk | |
| 25 25 | 
             
                  CONFIG_MAPPER_FACTORY_CLASS = CLASSLOADER.loadClass("org.embulk.util.config.ConfigMapperFactory").ruby_class
         | 
| 26 26 | 
             
                  TYPE_MODULE_CLASS = CLASSLOADER.loadClass("org.embulk.util.config.modules.TypeModule").ruby_class
         | 
| 27 27 | 
             
                  CONFIG_MAPPER_FACTORY = CONFIG_MAPPER_FACTORY_CLASS.builder.addDefaultModules.addModule(TYPE_MODULE_CLASS.new).build
         | 
| 28 | 
            -
                   | 
| 28 | 
            +
                  LEGACY_PLUGIN_TASK_CLASS = CLASSLOADER.loadClass("org.embulk.standards.CsvParserPlugin$PluginTask")
         | 
| 29 29 | 
             
                  LIST_FILE_INPUT_CLASS = CLASSLOADER.loadClass("org.embulk.util.file.ListFileInput").ruby_class
         | 
| 30 30 | 
             
                  LINE_DECODER_CLASS = CLASSLOADER.loadClass("org.embulk.util.text.LineDecoder").ruby_class
         | 
| 31 31 | 
             
                  CSV_GUESS_PLUGIN_CLASS = CLASSLOADER.loadClass("org.embulk.guess.csv.CsvGuessPlugin").ruby_class
         | 
| 32 | 
            -
                   | 
| 33 | 
            -
                   | 
| 34 | 
            -
                   | 
| 32 | 
            +
                  LEGACY_CSV_TOKENIZER_CLASS = CLASSLOADER.loadClass("org.embulk.standards.CsvTokenizer").ruby_class
         | 
| 33 | 
            +
                  LEGACY_TOO_FEW_COLUMNS_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$TooFewColumnsException").ruby_class
         | 
| 34 | 
            +
                  LEGACY_INVALID_VALUE_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$InvalidValueException").ruby_class
         | 
| 35 35 |  | 
| 36 36 | 
             
                  DELIMITER_CANDIDATES = [
         | 
| 37 37 | 
             
                    ",", "\t", "|", ";"
         | 
| @@ -71,7 +71,9 @@ module Embulk | |
| 71 71 | 
             
                      end
         | 
| 72 72 | 
             
                      guessed_ruby_converted = config_to_java(guessed_ruby)
         | 
| 73 73 | 
             
                      if !guessed_java.equals(guessed_ruby_converted)
         | 
| 74 | 
            -
                         | 
| 74 | 
            +
                        log_guess_diff(guessed_ruby, guessed_java, "decoders")
         | 
| 75 | 
            +
                        log_guess_diff(guessed_ruby, guessed_java, "parser")
         | 
| 76 | 
            +
                        raise "embulk-guess-csv has difference between Java/Ruby."
         | 
| 75 77 | 
             
                      end
         | 
| 76 78 | 
             
                    rescue Exception => e
         | 
| 77 79 | 
             
                      # Any error from the Java-based guess plugin should pass-through just with logging.
         | 
| @@ -237,34 +239,23 @@ module Embulk | |
| 237 239 |  | 
| 238 240 | 
             
                  private
         | 
| 239 241 |  | 
| 240 | 
            -
                  def  | 
| 241 | 
            -
                    guessed_ruby = guessed_ruby_entire[ | 
| 242 | 
            -
                    guessed_java = guessed_java_entire.getNestedOrGetEmpty( | 
| 243 | 
            -
             | 
| 244 | 
            -
                    require 'set'
         | 
| 245 | 
            -
                    keys = Set.new(guessed_ruby.keys) + Set.new(guessed_java.getAttributeNames)
         | 
| 242 | 
            +
                  def log_guess_diff(guessed_ruby_entire, guessed_java_entire, key)
         | 
| 243 | 
            +
                    guessed_ruby = guessed_ruby_entire[key] || {}
         | 
| 244 | 
            +
                    guessed_java = guessed_java_entire.getNestedOrGetEmpty(key)
         | 
| 246 245 |  | 
| 247 246 | 
             
                    begin
         | 
| 248 247 | 
             
                      require 'json'
         | 
| 249 248 | 
             
                    rescue LoadError
         | 
| 250 | 
            -
                       | 
| 251 | 
            -
                      guessed_java_hash = nil
         | 
| 249 | 
            +
                      raise "The 'json' gem is not installed. No details compared."
         | 
| 252 250 | 
             
                    else
         | 
| 253 251 | 
             
                      guessed_java_hash = JSON.parse(guessed_java.toJson)
         | 
| 254 252 | 
             
                    end
         | 
| 255 253 |  | 
| 256 | 
            -
                     | 
| 257 | 
            -
             | 
| 258 | 
            -
                       | 
| 259 | 
            -
             | 
| 260 | 
            -
                      elsif !guessed_java.has(key.to_java)
         | 
| 261 | 
            -
                        diffs << "Only embulk-guess-csv (Ruby) has: \"#{key}\""
         | 
| 262 | 
            -
                      elsif guessed_java_hash && guessed_ruby[key] != guessed_java_hash[key]
         | 
| 263 | 
            -
                        diffs << "embulk-guess-csv has difference between Java/Ruby: \"#{key}\""
         | 
| 264 | 
            -
                      end
         | 
| 254 | 
            +
                    if guessed_java_hash && guessed_ruby != guessed_java_hash
         | 
| 255 | 
            +
                      Embulk.logger.error "[Embulk CSV guess verify] '#{key}' has difference."
         | 
| 256 | 
            +
                      Embulk.logger.error "[Embulk CSV guess verify] Java => #{guessed_java_hash.to_json}"
         | 
| 257 | 
            +
                      Embulk.logger.error "[Embulk CSV guess verify] Ruby => #{guessed_ruby.to_json}"
         | 
| 265 258 | 
             
                    end
         | 
| 266 | 
            -
             | 
| 267 | 
            -
                    raise "embulk-guess-csv has difference between Java/Ruby: #{diffs.inspect}"
         | 
| 268 259 | 
             
                  end
         | 
| 269 260 |  | 
| 270 261 | 
             
                  def config_to_java(config_ruby)
         | 
| @@ -289,12 +280,11 @@ module Embulk | |
| 289 280 | 
             
                  def split_lines(parser_config, skip_empty_lines, sample_lines, delim, extra_config)
         | 
| 290 281 | 
             
                    null_string = parser_config["null_string"]
         | 
| 291 282 | 
             
                    config = parser_config.merge(extra_config).merge({"charset" => "UTF-8", "columns" => []})
         | 
| 292 | 
            -
                    parser_task =  | 
| 283 | 
            +
                    parser_task = config.load_config(LEGACY_PLUGIN_TASK_CLASS)
         | 
| 293 284 | 
             
                    data = sample_lines.map {|line| line.force_encoding('UTF-8') }.join(parser_task.getNewline.getString.encode('UTF-8'))
         | 
| 294 285 | 
             
                    sample = Buffer.from_ruby_string(data)
         | 
| 295 | 
            -
                    decoder =  | 
| 296 | 
            -
             | 
| 297 | 
            -
                    tokenizer = CSV_TOKENIZER_CLASS.new(decoder, parser_task)
         | 
| 286 | 
            +
                    decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
         | 
| 287 | 
            +
                    tokenizer = LEGACY_CSV_TOKENIZER_CLASS.new(decoder, parser_task)
         | 
| 298 288 | 
             
                    rows = []
         | 
| 299 289 | 
             
                    while tokenizer.nextFile
         | 
| 300 290 | 
             
                      while tokenizer.nextRecord(skip_empty_lines)
         | 
| @@ -308,12 +298,12 @@ module Embulk | |
| 308 298 | 
             
                                column = nil
         | 
| 309 299 | 
             
                              end
         | 
| 310 300 | 
             
                              columns << column
         | 
| 311 | 
            -
                            rescue  | 
| 301 | 
            +
                            rescue LEGACY_TOO_FEW_COLUMNS_EXCEPTION_CLASS
         | 
| 312 302 | 
             
                              rows << columns
         | 
| 313 303 | 
             
                              break
         | 
| 314 304 | 
             
                            end
         | 
| 315 305 | 
             
                          end
         | 
| 316 | 
            -
                        rescue  | 
| 306 | 
            +
                        rescue LEGACY_INVALID_VALUE_EXCEPTION_CLASS
         | 
| 317 307 | 
             
                          # TODO warning
         | 
| 318 308 | 
             
                          tokenizer.skipCurrentLine
         | 
| 319 309 | 
             
                        end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: embulk-guess-csv_verify
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.10. | 
| 4 | 
            +
              version: 0.10.30
         | 
| 5 5 | 
             
            platform: java
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Sadayuki Furuhashi
         | 
| @@ -10,19 +10,29 @@ authors: | |
| 10 10 | 
             
            autorequire:
         | 
| 11 11 | 
             
            bindir: bin
         | 
| 12 12 | 
             
            cert_chain: []
         | 
| 13 | 
            -
            date: 2021-04- | 
| 13 | 
            +
            date: 2021-04-15 00:00:00.000000000 Z
         | 
| 14 14 | 
             
            dependencies: []
         | 
| 15 15 | 
             
            description: Verification-purpose Embulk CSV guess plugin to compare the old Ruby-based
         | 
| 16 | 
            -
              one and the new Java-based one (not for your production use | 
| 16 | 
            +
              one and the new Java-based one (not for your production use; note that 'decoders'
         | 
| 17 | 
            +
              and 'parser' sections in your configuration can be logged even if they contain confidential
         | 
| 18 | 
            +
              information)
         | 
| 17 19 | 
             
            email:
         | 
| 18 20 | 
             
            - dmikurube@treasure-data.com
         | 
| 19 21 | 
             
            executables: []
         | 
| 20 22 | 
             
            extensions: []
         | 
| 21 23 | 
             
            extra_rdoc_files: []
         | 
| 22 24 | 
             
            files:
         | 
| 23 | 
            -
            - classpath/ | 
| 24 | 
            -
            - classpath/ | 
| 25 | 
            -
            - classpath/ | 
| 25 | 
            +
            - classpath/aopalliance-1.0.jar
         | 
| 26 | 
            +
            - classpath/bval-core-0.5.jar
         | 
| 27 | 
            +
            - classpath/bval-jsr303-0.5.jar
         | 
| 28 | 
            +
            - classpath/commons-beanutils-core-1.8.3.jar
         | 
| 29 | 
            +
            - classpath/commons-lang3-3.4.jar
         | 
| 30 | 
            +
            - classpath/embulk-api-0.10.30.jar
         | 
| 31 | 
            +
            - classpath/embulk-core-0.10.30.jar
         | 
| 32 | 
            +
            - classpath/embulk-guess-csv-0.10.30.jar
         | 
| 33 | 
            +
            - classpath/embulk-guess-csv_verify-0.10.30.jar
         | 
| 34 | 
            +
            - classpath/embulk-parser-csv-0.10.30.jar
         | 
| 35 | 
            +
            - classpath/embulk-spi-0.10.30.jar
         | 
| 26 36 | 
             
            - classpath/embulk-util-config-0.2.1.jar
         | 
| 27 37 | 
             
            - classpath/embulk-util-file-0.1.3.jar
         | 
| 28 38 | 
             
            - classpath/embulk-util-guess-0.1.1.jar
         | 
| @@ -30,11 +40,19 @@ files: | |
| 30 40 | 
             
            - classpath/embulk-util-rubytime-0.3.2.jar
         | 
| 31 41 | 
             
            - classpath/embulk-util-text-0.1.0.jar
         | 
| 32 42 | 
             
            - classpath/embulk-util-timestamp-0.2.1.jar
         | 
| 43 | 
            +
            - classpath/guava-18.0.jar
         | 
| 44 | 
            +
            - classpath/guice-4.0.jar
         | 
| 45 | 
            +
            - classpath/guice-multibindings-4.0.jar
         | 
| 33 46 | 
             
            - classpath/icu4j-54.1.1.jar
         | 
| 34 47 | 
             
            - classpath/jackson-annotations-2.6.7.jar
         | 
| 35 48 | 
             
            - classpath/jackson-core-2.6.7.jar
         | 
| 36 49 | 
             
            - classpath/jackson-databind-2.6.7.jar
         | 
| 50 | 
            +
            - classpath/jackson-datatype-guava-2.6.7.jar
         | 
| 37 51 | 
             
            - classpath/jackson-datatype-jdk8-2.6.7.jar
         | 
| 52 | 
            +
            - classpath/jackson-module-guice-2.6.7.jar
         | 
| 53 | 
            +
            - classpath/javax.inject-1.jar
         | 
| 54 | 
            +
            - classpath/msgpack-core-0.8.11.jar
         | 
| 55 | 
            +
            - classpath/slf4j-api-1.7.30.jar
         | 
| 38 56 | 
             
            - classpath/validation-api-1.1.0.Final.jar
         | 
| 39 57 | 
             
            - lib/embulk/guess/csv_verify.rb
         | 
| 40 58 | 
             
            homepage: https://github.com/embulk/embulk
         | 
| Binary file | 
| Binary file |