embulk-filter-split_column 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java +8 -5
- data/src/test/resources/example.csv +5 -0
- data/src/test/resources/example.yml +26 -0
- data/src/test/resources/few.csv +3 -0
- data/src/test/resources/few.yml +26 -0
- data/src/test/resources/too_large.csv +3 -0
- data/src/test/resources/too_large.yml +26 -0
- metadata +9 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 53a619abdc297f96cee13340709ceee54c3ce911
         | 
| 4 | 
            +
              data.tar.gz: b92e6154f1481d505e5d03d850b1041ccc846dd2
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 8be926fd3253406702e7816b802d7a56af41259f5dbd35e50f9fe9934fa9a659311418459fc733d2b5e925425d9233e8104a96b691fd536a12bf663407d73669
         | 
| 7 | 
            +
              data.tar.gz: 59f4001fa6fffaa405eeb2f1d8b4d1bce44af286c6d8f59a34a05638f0edcd94254c8a1efdaffa0d2af5511de8e23870faba411646a3ab47347ff08318982b63
         | 
    
        data/README.md
    CHANGED
    
    
    
        data/build.gradle
    CHANGED
    
    
| @@ -107,23 +107,26 @@ public class SplitColumnFilterPlugin | |
| 107 107 | 
             
                            int rowNum = 0;
         | 
| 108 108 | 
             
                            while (reader.nextRecord()) {
         | 
| 109 109 | 
             
                                rowNum++;
         | 
| 110 | 
            -
                                String | 
| 110 | 
            +
                                String targetColumnValue = reader.getString(targetColumn);
         | 
| 111 | 
            +
                                String[] words = StringUtils.split(targetColumnValue, task.getDelimiter());
         | 
| 111 112 | 
             
                                SchemaConfig outputSchemaConfig = task.getOutputColumns();
         | 
| 112 113 | 
             
                                // check split values
         | 
| 113 114 | 
             
                                if (outputSchemaConfig.size() != words.length) {
         | 
| 114 115 | 
             
                                    Boolean isSkip = task.getIsSkip().get();
         | 
| 115 116 | 
             
                                    if (isSkip.booleanValue()) {
         | 
| 116 | 
            -
                                        String message = String.format("Skipped line %d:  | 
| 117 | 
            +
                                        String message = String.format("Skipped line %d: output_column has %d columns but value was separated in %d: \"%s\"",
         | 
| 117 118 | 
             
                                            rowNum,
         | 
| 118 119 | 
             
                                            outputSchemaConfig.size(),
         | 
| 119 | 
            -
                                            words.length
         | 
| 120 | 
            +
                                            words.length,
         | 
| 121 | 
            +
                                            targetColumnValue
         | 
| 120 122 | 
             
                                        );
         | 
| 121 123 | 
             
                                        log.warn(message);
         | 
| 122 124 | 
             
                                        continue;
         | 
| 123 125 | 
             
                                    } else {
         | 
| 124 | 
            -
                                        String message = String.format(" | 
| 126 | 
            +
                                        String message = String.format("output_column has %d columns but value was separated in %d: \"%s\"",
         | 
| 125 127 | 
             
                                            outputSchemaConfig.size(),
         | 
| 126 | 
            -
                                            words.length
         | 
| 128 | 
            +
                                            words.length,
         | 
| 129 | 
            +
                                            targetColumnValue
         | 
| 127 130 | 
             
                                        );
         | 
| 128 131 | 
             
                                        throw new SplitColumnValidateException(message);
         | 
| 129 132 | 
             
                                    }
         | 
| @@ -0,0 +1,5 @@ | |
| 1 | 
            +
            id,account,time,purchase,comment
         | 
| 2 | 
            +
            1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26
         | 
| 3 | 
            +
            2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
         | 
| 4 | 
            +
            3,27559,2015-01-28 02:20:02,20150128,c|3|3.3|False|2016-01-28
         | 
| 5 | 
            +
            4,11270,2015-01-29 11:54:36,20150129,d|4|4.4|True|2016-01-29
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            in:
         | 
| 2 | 
            +
              type: file
         | 
| 3 | 
            +
              path_prefix: ./src/test/resources/example.csv
         | 
| 4 | 
            +
              parser:
         | 
| 5 | 
            +
                type: csv
         | 
| 6 | 
            +
                delimiter: ','
         | 
| 7 | 
            +
                skip_header_lines: 1
         | 
| 8 | 
            +
                columns:
         | 
| 9 | 
            +
                - {name: id, type: long}
         | 
| 10 | 
            +
                - {name: account, type: long}
         | 
| 11 | 
            +
                - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
         | 
| 12 | 
            +
                - {name: purchase, type: timestamp, format: '%Y%m%d'}
         | 
| 13 | 
            +
                - {name: comment, type: string}
         | 
| 14 | 
            +
            filters:
         | 
| 15 | 
            +
              - type: split_column
         | 
| 16 | 
            +
                delimiter: '|'
         | 
| 17 | 
            +
                is_skip: true
         | 
| 18 | 
            +
                target_key: comment
         | 
| 19 | 
            +
                output_columns:
         | 
| 20 | 
            +
                  - {name: alph, type: string}
         | 
| 21 | 
            +
                  - {name: num, type: long}
         | 
| 22 | 
            +
                  - {name: dbl, type: double}
         | 
| 23 | 
            +
                  - {name: bool, type: boolean}
         | 
| 24 | 
            +
                  - {name: ts, type: timestamp, format: '%Y-%m-%d'}
         | 
| 25 | 
            +
            out:
         | 
| 26 | 
            +
              type: stdout
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            in:
         | 
| 2 | 
            +
              type: file
         | 
| 3 | 
            +
              path_prefix: ./src/test/resources/few.csv
         | 
| 4 | 
            +
              parser:
         | 
| 5 | 
            +
                type: csv
         | 
| 6 | 
            +
                delimiter: ','
         | 
| 7 | 
            +
                skip_header_lines: 1
         | 
| 8 | 
            +
                columns:
         | 
| 9 | 
            +
                - {name: id, type: long}
         | 
| 10 | 
            +
                - {name: account, type: long}
         | 
| 11 | 
            +
                - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
         | 
| 12 | 
            +
                - {name: purchase, type: timestamp, format: '%Y%m%d'}
         | 
| 13 | 
            +
                - {name: comment, type: string}
         | 
| 14 | 
            +
            filters:
         | 
| 15 | 
            +
              - type: split_column
         | 
| 16 | 
            +
                delimiter: '|'
         | 
| 17 | 
            +
                is_skip: true
         | 
| 18 | 
            +
                target_key: comment
         | 
| 19 | 
            +
                output_columns:
         | 
| 20 | 
            +
                  - {name: alph, type: string}
         | 
| 21 | 
            +
                  - {name: num, type: long}
         | 
| 22 | 
            +
                  - {name: dbl, type: double}
         | 
| 23 | 
            +
                  - {name: bool, type: boolean}
         | 
| 24 | 
            +
                  - {name: ts, type: timestamp, format: '%Y-%m-%d'}
         | 
| 25 | 
            +
            out:
         | 
| 26 | 
            +
              type: stdout
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            in:
         | 
| 2 | 
            +
              type: file
         | 
| 3 | 
            +
              path_prefix: ./src/test/resources/too_large.csv
         | 
| 4 | 
            +
              parser:
         | 
| 5 | 
            +
                type: csv
         | 
| 6 | 
            +
                delimiter: ','
         | 
| 7 | 
            +
                skip_header_lines: 1
         | 
| 8 | 
            +
                columns:
         | 
| 9 | 
            +
                - {name: id, type: long}
         | 
| 10 | 
            +
                - {name: account, type: long}
         | 
| 11 | 
            +
                - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
         | 
| 12 | 
            +
                - {name: purchase, type: timestamp, format: '%Y%m%d'}
         | 
| 13 | 
            +
                - {name: comment, type: string}
         | 
| 14 | 
            +
            filters:
         | 
| 15 | 
            +
              - type: split_column
         | 
| 16 | 
            +
                delimiter: '|'
         | 
| 17 | 
            +
                is_skip: true
         | 
| 18 | 
            +
                target_key: comment
         | 
| 19 | 
            +
                output_columns:
         | 
| 20 | 
            +
                  - {name: alph, type: string}
         | 
| 21 | 
            +
                  - {name: num, type: long}
         | 
| 22 | 
            +
                  - {name: dbl, type: double}
         | 
| 23 | 
            +
                  - {name: bool, type: boolean}
         | 
| 24 | 
            +
                  - {name: ts, type: timestamp, format: '%Y-%m-%d'}
         | 
| 25 | 
            +
            out:
         | 
| 26 | 
            +
              type: stdout
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: embulk-filter-split_column
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.3
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - yskn67
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2017-09-27 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -58,7 +58,13 @@ files: | |
| 58 58 | 
             
            - lib/embulk/filter/split_column.rb
         | 
| 59 59 | 
             
            - src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java
         | 
| 60 60 | 
             
            - src/test/java/org/embulk/filter/split_column/TestSplitColumnFilterPlugin.java
         | 
| 61 | 
            -
            -  | 
| 61 | 
            +
            - src/test/resources/example.csv
         | 
| 62 | 
            +
            - src/test/resources/example.yml
         | 
| 63 | 
            +
            - src/test/resources/few.csv
         | 
| 64 | 
            +
            - src/test/resources/few.yml
         | 
| 65 | 
            +
            - src/test/resources/too_large.csv
         | 
| 66 | 
            +
            - src/test/resources/too_large.yml
         | 
| 67 | 
            +
            - classpath/embulk-filter-split_column-0.1.3.jar
         | 
| 62 68 | 
             
            homepage: https://github.com/yskn67/embulk-filter-split_column
         | 
| 63 69 | 
             
            licenses:
         | 
| 64 70 | 
             
            - MIT
         |