embulk-filter-split_column 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java +8 -5
- data/src/test/resources/example.csv +5 -0
- data/src/test/resources/example.yml +26 -0
- data/src/test/resources/few.csv +3 -0
- data/src/test/resources/few.yml +26 -0
- data/src/test/resources/too_large.csv +3 -0
- data/src/test/resources/too_large.yml +26 -0
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53a619abdc297f96cee13340709ceee54c3ce911
|
4
|
+
data.tar.gz: b92e6154f1481d505e5d03d850b1041ccc846dd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8be926fd3253406702e7816b802d7a56af41259f5dbd35e50f9fe9934fa9a659311418459fc733d2b5e925425d9233e8104a96b691fd536a12bf663407d73669
|
7
|
+
data.tar.gz: 59f4001fa6fffaa405eeb2f1d8b4d1bce44af286c6d8f59a34a05638f0edcd94254c8a1efdaffa0d2af5511de8e23870faba411646a3ab47347ff08318982b63
|
data/README.md
CHANGED
data/build.gradle
CHANGED
@@ -107,23 +107,26 @@ public class SplitColumnFilterPlugin
|
|
107
107
|
int rowNum = 0;
|
108
108
|
while (reader.nextRecord()) {
|
109
109
|
rowNum++;
|
110
|
-
String
|
110
|
+
String targetColumnValue = reader.getString(targetColumn);
|
111
|
+
String[] words = StringUtils.split(targetColumnValue, task.getDelimiter());
|
111
112
|
SchemaConfig outputSchemaConfig = task.getOutputColumns();
|
112
113
|
// check split values
|
113
114
|
if (outputSchemaConfig.size() != words.length) {
|
114
115
|
Boolean isSkip = task.getIsSkip().get();
|
115
116
|
if (isSkip.booleanValue()) {
|
116
|
-
String message = String.format("Skipped line %d:
|
117
|
+
String message = String.format("Skipped line %d: output_column has %d columns but value was separated in %d: \"%s\"",
|
117
118
|
rowNum,
|
118
119
|
outputSchemaConfig.size(),
|
119
|
-
words.length
|
120
|
+
words.length,
|
121
|
+
targetColumnValue
|
120
122
|
);
|
121
123
|
log.warn(message);
|
122
124
|
continue;
|
123
125
|
} else {
|
124
|
-
String message = String.format("
|
126
|
+
String message = String.format("output_column has %d columns but value was separated in %d: \"%s\"",
|
125
127
|
outputSchemaConfig.size(),
|
126
|
-
words.length
|
128
|
+
words.length,
|
129
|
+
targetColumnValue
|
127
130
|
);
|
128
131
|
throw new SplitColumnValidateException(message);
|
129
132
|
}
|
@@ -0,0 +1,5 @@
|
|
1
|
+
id,account,time,purchase,comment
|
2
|
+
1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26
|
3
|
+
2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
|
4
|
+
3,27559,2015-01-28 02:20:02,20150128,c|3|3.3|False|2016-01-28
|
5
|
+
4,11270,2015-01-29 11:54:36,20150129,d|4|4.4|True|2016-01-29
|
@@ -0,0 +1,26 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
skip_header_lines: 1
|
8
|
+
columns:
|
9
|
+
- {name: id, type: long}
|
10
|
+
- {name: account, type: long}
|
11
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
12
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
13
|
+
- {name: comment, type: string}
|
14
|
+
filters:
|
15
|
+
- type: split_column
|
16
|
+
delimiter: '|'
|
17
|
+
is_skip: true
|
18
|
+
target_key: comment
|
19
|
+
output_columns:
|
20
|
+
- {name: alph, type: string}
|
21
|
+
- {name: num, type: long}
|
22
|
+
- {name: dbl, type: double}
|
23
|
+
- {name: bool, type: boolean}
|
24
|
+
- {name: ts, type: timestamp, format: '%Y-%m-%d'}
|
25
|
+
out:
|
26
|
+
type: stdout
|
@@ -0,0 +1,26 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/few.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
skip_header_lines: 1
|
8
|
+
columns:
|
9
|
+
- {name: id, type: long}
|
10
|
+
- {name: account, type: long}
|
11
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
12
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
13
|
+
- {name: comment, type: string}
|
14
|
+
filters:
|
15
|
+
- type: split_column
|
16
|
+
delimiter: '|'
|
17
|
+
is_skip: true
|
18
|
+
target_key: comment
|
19
|
+
output_columns:
|
20
|
+
- {name: alph, type: string}
|
21
|
+
- {name: num, type: long}
|
22
|
+
- {name: dbl, type: double}
|
23
|
+
- {name: bool, type: boolean}
|
24
|
+
- {name: ts, type: timestamp, format: '%Y-%m-%d'}
|
25
|
+
out:
|
26
|
+
type: stdout
|
@@ -0,0 +1,26 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/too_large.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
skip_header_lines: 1
|
8
|
+
columns:
|
9
|
+
- {name: id, type: long}
|
10
|
+
- {name: account, type: long}
|
11
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
12
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
13
|
+
- {name: comment, type: string}
|
14
|
+
filters:
|
15
|
+
- type: split_column
|
16
|
+
delimiter: '|'
|
17
|
+
is_skip: true
|
18
|
+
target_key: comment
|
19
|
+
output_columns:
|
20
|
+
- {name: alph, type: string}
|
21
|
+
- {name: num, type: long}
|
22
|
+
- {name: dbl, type: double}
|
23
|
+
- {name: bool, type: boolean}
|
24
|
+
- {name: ts, type: timestamp, format: '%Y-%m-%d'}
|
25
|
+
out:
|
26
|
+
type: stdout
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-split_column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yskn67
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,13 @@ files:
|
|
58
58
|
- lib/embulk/filter/split_column.rb
|
59
59
|
- src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java
|
60
60
|
- src/test/java/org/embulk/filter/split_column/TestSplitColumnFilterPlugin.java
|
61
|
-
-
|
61
|
+
- src/test/resources/example.csv
|
62
|
+
- src/test/resources/example.yml
|
63
|
+
- src/test/resources/few.csv
|
64
|
+
- src/test/resources/few.yml
|
65
|
+
- src/test/resources/too_large.csv
|
66
|
+
- src/test/resources/too_large.yml
|
67
|
+
- classpath/embulk-filter-split_column-0.1.3.jar
|
62
68
|
homepage: https://github.com/yskn67/embulk-filter-split_column
|
63
69
|
licenses:
|
64
70
|
- MIT
|