embulk-filter-split_column 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7296372eb3a2f58d15e07ebdd2afeae7c5a22589
4
- data.tar.gz: 2bc5b5a43317e58ec8aa59eabeff32b1277f8de5
3
+ metadata.gz: 53a619abdc297f96cee13340709ceee54c3ce911
4
+ data.tar.gz: b92e6154f1481d505e5d03d850b1041ccc846dd2
5
5
  SHA512:
6
- metadata.gz: c5e55cda448c31d069a401f7032997392436812b5dc9fc4c52d082ba95b1ddc73c6ae9027a7bb68a446365af3a0ad542acde25c455b08e56da3eea1a462cb648
7
- data.tar.gz: 0b341b2cf82a6262a5183fcc4bb5368a76809837659fa62c7b2f2f46e5b58bc23f891f709d7d1f22ba193196f1ddc8c430dd6ab6ab0ac066e5f548f5a663a576
6
+ metadata.gz: 8be926fd3253406702e7816b802d7a56af41259f5dbd35e50f9fe9934fa9a659311418459fc733d2b5e925425d9233e8104a96b691fd536a12bf663407d73669
7
+ data.tar.gz: 59f4001fa6fffaa405eeb2f1d8b4d1bce44af286c6d8f59a34a05638f0edcd94254c8a1efdaffa0d2af5511de8e23870faba411646a3ab47347ff08318982b63
data/README.md CHANGED
@@ -82,6 +82,7 @@ filters:
82
82
  - 0.1.0: first release
83
83
  - 0.1.1: bugfix
84
84
  - 0.1.2: add confing option 'is_skip'
85
+ - 0.1.3: add failed log to exception msg
85
86
 
86
87
  ## Build
87
88
 
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.2"
16
+ version = "0.1.3"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -107,23 +107,26 @@ public class SplitColumnFilterPlugin
107
107
  int rowNum = 0;
108
108
  while (reader.nextRecord()) {
109
109
  rowNum++;
110
- String[] words = StringUtils.split(reader.getString(targetColumn),task.getDelimiter());
110
+ String targetColumnValue = reader.getString(targetColumn);
111
+ String[] words = StringUtils.split(targetColumnValue, task.getDelimiter());
111
112
  SchemaConfig outputSchemaConfig = task.getOutputColumns();
112
113
  // check split values
113
114
  if (outputSchemaConfig.size() != words.length) {
114
115
  Boolean isSkip = task.getIsSkip().get();
115
116
  if (isSkip.booleanValue()) {
116
- String message = String.format("Skipped line %d: outputColumn has %d columns but value was separated in %d",
117
+ String message = String.format("Skipped line %d: output_column has %d columns but value was separated in %d: \"%s\"",
117
118
  rowNum,
118
119
  outputSchemaConfig.size(),
119
- words.length
120
+ words.length,
121
+ targetColumnValue
120
122
  );
121
123
  log.warn(message);
122
124
  continue;
123
125
  } else {
124
- String message = String.format("outputColumn has %d columns but value was separated in %d",
126
+ String message = String.format("output_column has %d columns but value was separated in %d: \"%s\"",
125
127
  outputSchemaConfig.size(),
126
- words.length
128
+ words.length,
129
+ targetColumnValue
127
130
  );
128
131
  throw new SplitColumnValidateException(message);
129
132
  }
@@ -0,0 +1,5 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26
3
+ 2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
4
+ 3,27559,2015-01-28 02:20:02,20150128,c|3|3.3|False|2016-01-28
5
+ 4,11270,2015-01-29 11:54:36,20150129,d|4|4.4|True|2016-01-29
@@ -0,0 +1,26 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/example.csv
4
+ parser:
5
+ type: csv
6
+ delimiter: ','
7
+ skip_header_lines: 1
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: account, type: long}
11
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
12
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
13
+ - {name: comment, type: string}
14
+ filters:
15
+ - type: split_column
16
+ delimiter: '|'
17
+ is_skip: true
18
+ target_key: comment
19
+ output_columns:
20
+ - {name: alph, type: string}
21
+ - {name: num, type: long}
22
+ - {name: dbl, type: double}
23
+ - {name: bool, type: boolean}
24
+ - {name: ts, type: timestamp, format: '%Y-%m-%d'}
25
+ out:
26
+ type: stdout
@@ -0,0 +1,3 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True
3
+ 2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
@@ -0,0 +1,26 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/few.csv
4
+ parser:
5
+ type: csv
6
+ delimiter: ','
7
+ skip_header_lines: 1
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: account, type: long}
11
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
12
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
13
+ - {name: comment, type: string}
14
+ filters:
15
+ - type: split_column
16
+ delimiter: '|'
17
+ is_skip: true
18
+ target_key: comment
19
+ output_columns:
20
+ - {name: alph, type: string}
21
+ - {name: num, type: long}
22
+ - {name: dbl, type: double}
23
+ - {name: bool, type: boolean}
24
+ - {name: ts, type: timestamp, format: '%Y-%m-%d'}
25
+ out:
26
+ type: stdout
@@ -0,0 +1,3 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26|a
3
+ 2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
@@ -0,0 +1,26 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/too_large.csv
4
+ parser:
5
+ type: csv
6
+ delimiter: ','
7
+ skip_header_lines: 1
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: account, type: long}
11
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
12
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
13
+ - {name: comment, type: string}
14
+ filters:
15
+ - type: split_column
16
+ delimiter: '|'
17
+ is_skip: true
18
+ target_key: comment
19
+ output_columns:
20
+ - {name: alph, type: string}
21
+ - {name: num, type: long}
22
+ - {name: dbl, type: double}
23
+ - {name: bool, type: boolean}
24
+ - {name: ts, type: timestamp, format: '%Y-%m-%d'}
25
+ out:
26
+ type: stdout
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-split_column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yskn67
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-18 00:00:00.000000000 Z
11
+ date: 2017-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,7 +58,13 @@ files:
58
58
  - lib/embulk/filter/split_column.rb
59
59
  - src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java
60
60
  - src/test/java/org/embulk/filter/split_column/TestSplitColumnFilterPlugin.java
61
- - classpath/embulk-filter-split_column-0.1.2.jar
61
+ - src/test/resources/example.csv
62
+ - src/test/resources/example.yml
63
+ - src/test/resources/few.csv
64
+ - src/test/resources/few.yml
65
+ - src/test/resources/too_large.csv
66
+ - src/test/resources/too_large.yml
67
+ - classpath/embulk-filter-split_column-0.1.3.jar
62
68
  homepage: https://github.com/yskn67/embulk-filter-split_column
63
69
  licenses:
64
70
  - MIT