embulk-filter-split_column 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java +8 -5
- data/src/test/resources/example.csv +5 -0
- data/src/test/resources/example.yml +26 -0
- data/src/test/resources/few.csv +3 -0
- data/src/test/resources/few.yml +26 -0
- data/src/test/resources/too_large.csv +3 -0
- data/src/test/resources/too_large.yml +26 -0
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53a619abdc297f96cee13340709ceee54c3ce911
|
4
|
+
data.tar.gz: b92e6154f1481d505e5d03d850b1041ccc846dd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8be926fd3253406702e7816b802d7a56af41259f5dbd35e50f9fe9934fa9a659311418459fc733d2b5e925425d9233e8104a96b691fd536a12bf663407d73669
|
7
|
+
data.tar.gz: 59f4001fa6fffaa405eeb2f1d8b4d1bce44af286c6d8f59a34a05638f0edcd94254c8a1efdaffa0d2af5511de8e23870faba411646a3ab47347ff08318982b63
|
data/README.md
CHANGED
data/build.gradle
CHANGED
@@ -107,23 +107,26 @@ public class SplitColumnFilterPlugin
|
|
107
107
|
int rowNum = 0;
|
108
108
|
while (reader.nextRecord()) {
|
109
109
|
rowNum++;
|
110
|
-
String
|
110
|
+
String targetColumnValue = reader.getString(targetColumn);
|
111
|
+
String[] words = StringUtils.split(targetColumnValue, task.getDelimiter());
|
111
112
|
SchemaConfig outputSchemaConfig = task.getOutputColumns();
|
112
113
|
// check split values
|
113
114
|
if (outputSchemaConfig.size() != words.length) {
|
114
115
|
Boolean isSkip = task.getIsSkip().get();
|
115
116
|
if (isSkip.booleanValue()) {
|
116
|
-
String message = String.format("Skipped line %d:
|
117
|
+
String message = String.format("Skipped line %d: output_column has %d columns but value was separated in %d: \"%s\"",
|
117
118
|
rowNum,
|
118
119
|
outputSchemaConfig.size(),
|
119
|
-
words.length
|
120
|
+
words.length,
|
121
|
+
targetColumnValue
|
120
122
|
);
|
121
123
|
log.warn(message);
|
122
124
|
continue;
|
123
125
|
} else {
|
124
|
-
String message = String.format("
|
126
|
+
String message = String.format("output_column has %d columns but value was separated in %d: \"%s\"",
|
125
127
|
outputSchemaConfig.size(),
|
126
|
-
words.length
|
128
|
+
words.length,
|
129
|
+
targetColumnValue
|
127
130
|
);
|
128
131
|
throw new SplitColumnValidateException(message);
|
129
132
|
}
|
@@ -0,0 +1,5 @@
|
|
1
|
+
id,account,time,purchase,comment
|
2
|
+
1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26
|
3
|
+
2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
|
4
|
+
3,27559,2015-01-28 02:20:02,20150128,c|3|3.3|False|2016-01-28
|
5
|
+
4,11270,2015-01-29 11:54:36,20150129,d|4|4.4|True|2016-01-29
|
@@ -0,0 +1,26 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
skip_header_lines: 1
|
8
|
+
columns:
|
9
|
+
- {name: id, type: long}
|
10
|
+
- {name: account, type: long}
|
11
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
12
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
13
|
+
- {name: comment, type: string}
|
14
|
+
filters:
|
15
|
+
- type: split_column
|
16
|
+
delimiter: '|'
|
17
|
+
is_skip: true
|
18
|
+
target_key: comment
|
19
|
+
output_columns:
|
20
|
+
- {name: alph, type: string}
|
21
|
+
- {name: num, type: long}
|
22
|
+
- {name: dbl, type: double}
|
23
|
+
- {name: bool, type: boolean}
|
24
|
+
- {name: ts, type: timestamp, format: '%Y-%m-%d'}
|
25
|
+
out:
|
26
|
+
type: stdout
|
@@ -0,0 +1,26 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/few.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
skip_header_lines: 1
|
8
|
+
columns:
|
9
|
+
- {name: id, type: long}
|
10
|
+
- {name: account, type: long}
|
11
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
12
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
13
|
+
- {name: comment, type: string}
|
14
|
+
filters:
|
15
|
+
- type: split_column
|
16
|
+
delimiter: '|'
|
17
|
+
is_skip: true
|
18
|
+
target_key: comment
|
19
|
+
output_columns:
|
20
|
+
- {name: alph, type: string}
|
21
|
+
- {name: num, type: long}
|
22
|
+
- {name: dbl, type: double}
|
23
|
+
- {name: bool, type: boolean}
|
24
|
+
- {name: ts, type: timestamp, format: '%Y-%m-%d'}
|
25
|
+
out:
|
26
|
+
type: stdout
|
@@ -0,0 +1,26 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/too_large.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
skip_header_lines: 1
|
8
|
+
columns:
|
9
|
+
- {name: id, type: long}
|
10
|
+
- {name: account, type: long}
|
11
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
12
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
13
|
+
- {name: comment, type: string}
|
14
|
+
filters:
|
15
|
+
- type: split_column
|
16
|
+
delimiter: '|'
|
17
|
+
is_skip: true
|
18
|
+
target_key: comment
|
19
|
+
output_columns:
|
20
|
+
- {name: alph, type: string}
|
21
|
+
- {name: num, type: long}
|
22
|
+
- {name: dbl, type: double}
|
23
|
+
- {name: bool, type: boolean}
|
24
|
+
- {name: ts, type: timestamp, format: '%Y-%m-%d'}
|
25
|
+
out:
|
26
|
+
type: stdout
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-split_column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yskn67
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,13 @@ files:
|
|
58
58
|
- lib/embulk/filter/split_column.rb
|
59
59
|
- src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java
|
60
60
|
- src/test/java/org/embulk/filter/split_column/TestSplitColumnFilterPlugin.java
|
61
|
-
-
|
61
|
+
- src/test/resources/example.csv
|
62
|
+
- src/test/resources/example.yml
|
63
|
+
- src/test/resources/few.csv
|
64
|
+
- src/test/resources/few.yml
|
65
|
+
- src/test/resources/too_large.csv
|
66
|
+
- src/test/resources/too_large.yml
|
67
|
+
- classpath/embulk-filter-split_column-0.1.3.jar
|
62
68
|
homepage: https://github.com/yskn67/embulk-filter-split_column
|
63
69
|
licenses:
|
64
70
|
- MIT
|