embulk-filter-split_column 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7296372eb3a2f58d15e07ebdd2afeae7c5a22589
4
- data.tar.gz: 2bc5b5a43317e58ec8aa59eabeff32b1277f8de5
3
+ metadata.gz: 53a619abdc297f96cee13340709ceee54c3ce911
4
+ data.tar.gz: b92e6154f1481d505e5d03d850b1041ccc846dd2
5
5
  SHA512:
6
- metadata.gz: c5e55cda448c31d069a401f7032997392436812b5dc9fc4c52d082ba95b1ddc73c6ae9027a7bb68a446365af3a0ad542acde25c455b08e56da3eea1a462cb648
7
- data.tar.gz: 0b341b2cf82a6262a5183fcc4bb5368a76809837659fa62c7b2f2f46e5b58bc23f891f709d7d1f22ba193196f1ddc8c430dd6ab6ab0ac066e5f548f5a663a576
6
+ metadata.gz: 8be926fd3253406702e7816b802d7a56af41259f5dbd35e50f9fe9934fa9a659311418459fc733d2b5e925425d9233e8104a96b691fd536a12bf663407d73669
7
+ data.tar.gz: 59f4001fa6fffaa405eeb2f1d8b4d1bce44af286c6d8f59a34a05638f0edcd94254c8a1efdaffa0d2af5511de8e23870faba411646a3ab47347ff08318982b63
data/README.md CHANGED
@@ -82,6 +82,7 @@ filters:
82
82
  - 0.1.0: first release
83
83
  - 0.1.1: bugfix
84
84
  - 0.1.2: add confing option 'is_skip'
85
+ - 0.1.3: add failed log to exception msg
85
86
 
86
87
  ## Build
87
88
 
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.2"
16
+ version = "0.1.3"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -107,23 +107,26 @@ public class SplitColumnFilterPlugin
107
107
  int rowNum = 0;
108
108
  while (reader.nextRecord()) {
109
109
  rowNum++;
110
- String[] words = StringUtils.split(reader.getString(targetColumn),task.getDelimiter());
110
+ String targetColumnValue = reader.getString(targetColumn);
111
+ String[] words = StringUtils.split(targetColumnValue, task.getDelimiter());
111
112
  SchemaConfig outputSchemaConfig = task.getOutputColumns();
112
113
  // check split values
113
114
  if (outputSchemaConfig.size() != words.length) {
114
115
  Boolean isSkip = task.getIsSkip().get();
115
116
  if (isSkip.booleanValue()) {
116
- String message = String.format("Skipped line %d: outputColumn has %d columns but value was separated in %d",
117
+ String message = String.format("Skipped line %d: output_column has %d columns but value was separated in %d: \"%s\"",
117
118
  rowNum,
118
119
  outputSchemaConfig.size(),
119
- words.length
120
+ words.length,
121
+ targetColumnValue
120
122
  );
121
123
  log.warn(message);
122
124
  continue;
123
125
  } else {
124
- String message = String.format("outputColumn has %d columns but value was separated in %d",
126
+ String message = String.format("output_column has %d columns but value was separated in %d: \"%s\"",
125
127
  outputSchemaConfig.size(),
126
- words.length
128
+ words.length,
129
+ targetColumnValue
127
130
  );
128
131
  throw new SplitColumnValidateException(message);
129
132
  }
@@ -0,0 +1,5 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26
3
+ 2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
4
+ 3,27559,2015-01-28 02:20:02,20150128,c|3|3.3|False|2016-01-28
5
+ 4,11270,2015-01-29 11:54:36,20150129,d|4|4.4|True|2016-01-29
@@ -0,0 +1,26 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/example.csv
4
+ parser:
5
+ type: csv
6
+ delimiter: ','
7
+ skip_header_lines: 1
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: account, type: long}
11
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
12
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
13
+ - {name: comment, type: string}
14
+ filters:
15
+ - type: split_column
16
+ delimiter: '|'
17
+ is_skip: true
18
+ target_key: comment
19
+ output_columns:
20
+ - {name: alph, type: string}
21
+ - {name: num, type: long}
22
+ - {name: dbl, type: double}
23
+ - {name: bool, type: boolean}
24
+ - {name: ts, type: timestamp, format: '%Y-%m-%d'}
25
+ out:
26
+ type: stdout
@@ -0,0 +1,3 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True
3
+ 2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
@@ -0,0 +1,26 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/few.csv
4
+ parser:
5
+ type: csv
6
+ delimiter: ','
7
+ skip_header_lines: 1
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: account, type: long}
11
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
12
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
13
+ - {name: comment, type: string}
14
+ filters:
15
+ - type: split_column
16
+ delimiter: '|'
17
+ is_skip: true
18
+ target_key: comment
19
+ output_columns:
20
+ - {name: alph, type: string}
21
+ - {name: num, type: long}
22
+ - {name: dbl, type: double}
23
+ - {name: bool, type: boolean}
24
+ - {name: ts, type: timestamp, format: '%Y-%m-%d'}
25
+ out:
26
+ type: stdout
@@ -0,0 +1,3 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26|a
3
+ 2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
@@ -0,0 +1,26 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/too_large.csv
4
+ parser:
5
+ type: csv
6
+ delimiter: ','
7
+ skip_header_lines: 1
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: account, type: long}
11
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
12
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
13
+ - {name: comment, type: string}
14
+ filters:
15
+ - type: split_column
16
+ delimiter: '|'
17
+ is_skip: true
18
+ target_key: comment
19
+ output_columns:
20
+ - {name: alph, type: string}
21
+ - {name: num, type: long}
22
+ - {name: dbl, type: double}
23
+ - {name: bool, type: boolean}
24
+ - {name: ts, type: timestamp, format: '%Y-%m-%d'}
25
+ out:
26
+ type: stdout
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-split_column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yskn67
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-18 00:00:00.000000000 Z
11
+ date: 2017-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,7 +58,13 @@ files:
58
58
  - lib/embulk/filter/split_column.rb
59
59
  - src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java
60
60
  - src/test/java/org/embulk/filter/split_column/TestSplitColumnFilterPlugin.java
61
- - classpath/embulk-filter-split_column-0.1.2.jar
61
+ - src/test/resources/example.csv
62
+ - src/test/resources/example.yml
63
+ - src/test/resources/few.csv
64
+ - src/test/resources/few.yml
65
+ - src/test/resources/too_large.csv
66
+ - src/test/resources/too_large.yml
67
+ - classpath/embulk-filter-split_column-0.1.3.jar
62
68
  homepage: https://github.com/yskn67/embulk-filter-split_column
63
69
  licenses:
64
70
  - MIT