embulk-filter-json_csv2arrayofobjects 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +91 -0
- data/build.gradle +98 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/example.tsv +2 -0
- data/example/example.yml +31 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +169 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/filter/json_csv2arrayofobjects.rb +3 -0
- data/src/main/java/org/embulk/filter/json_csv2arrayofobjects/ColumnVisitorImpl.java +118 -0
- data/src/main/java/org/embulk/filter/json_csv2arrayofobjects/Filter.java +68 -0
- data/src/main/java/org/embulk/filter/json_csv2arrayofobjects/JsonCsv2arrayofobjectsFilterPlugin.java +137 -0
- data/src/main/java/org/embulk/filter/json_csv2arrayofobjects/StringCast.java +101 -0
- data/src/test/java/org/embulk/filter/json_csv2arrayofobjects/TestFilter.java +238 -0
- data/src/test/java/org/embulk/filter/json_csv2arrayofobjects/TestJsonCsv2arrayofobjectsFilterPlugin.java +67 -0
- data/src/test/java/org/embulk/filter/json_csv2arrayofobjects/TestStringCast.java +99 -0
- metadata +98 -0
data/src/main/java/org/embulk/filter/json_csv2arrayofobjects/JsonCsv2arrayofobjectsFilterPlugin.java
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
package org.embulk.filter.json_csv2arrayofobjects;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
|
5
|
+
import org.embulk.config.Config;
|
6
|
+
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.config.Task;
|
10
|
+
import org.embulk.config.TaskSource;
|
11
|
+
import org.embulk.spi.Column;
|
12
|
+
import org.embulk.spi.Exec;
|
13
|
+
import org.embulk.spi.FilterPlugin;
|
14
|
+
import org.embulk.spi.Page;
|
15
|
+
import org.embulk.spi.PageBuilder;
|
16
|
+
import org.embulk.spi.PageOutput;
|
17
|
+
import org.embulk.spi.PageReader;
|
18
|
+
import org.embulk.spi.Schema;
|
19
|
+
import org.embulk.spi.type.Type;
|
20
|
+
|
21
|
+
import java.util.List;
|
22
|
+
|
23
|
+
public class JsonCsv2arrayofobjectsFilterPlugin
|
24
|
+
implements FilterPlugin
|
25
|
+
{
|
26
|
+
public interface PluginTask
|
27
|
+
extends Task
|
28
|
+
{
|
29
|
+
@Config("column")
|
30
|
+
public String getColumn();
|
31
|
+
|
32
|
+
@Config("key")
|
33
|
+
public String getKey();
|
34
|
+
|
35
|
+
@Config("delimiter")
|
36
|
+
@ConfigDefault("\",\"")
|
37
|
+
public Optional<String> getDelimiter();
|
38
|
+
|
39
|
+
@Config("sub_delimiter")
|
40
|
+
@ConfigDefault("\"-\"")
|
41
|
+
public Optional<String> getSubDelimiter();
|
42
|
+
|
43
|
+
@Config("sequence_name")
|
44
|
+
@ConfigDefault("null")
|
45
|
+
public Optional<String> getSequenceName();
|
46
|
+
|
47
|
+
@Config("output_keys")
|
48
|
+
public List<JsonKeyTask> getOutputKeys();
|
49
|
+
}
|
50
|
+
|
51
|
+
public interface JsonKeyTask
|
52
|
+
extends Task
|
53
|
+
{
|
54
|
+
@Config("name")
|
55
|
+
public String getName();
|
56
|
+
|
57
|
+
@Config("type")
|
58
|
+
public Type getType();
|
59
|
+
}
|
60
|
+
|
61
|
+
public void validate(PluginTask task, Schema inputSchema)
|
62
|
+
{
|
63
|
+
// throws exception when the column does not exist
|
64
|
+
Column column = inputSchema.lookupColumn(task.getColumn());
|
65
|
+
Type colType = column.getType();
|
66
|
+
// delimiter and sub_delimtier should not be equal
|
67
|
+
String delimiter = task.getDelimiter().get();
|
68
|
+
String subDelimiter = task.getSubDelimiter().get();
|
69
|
+
if (delimiter.equals(subDelimiter)) {
|
70
|
+
String errMsg = "delimiter and sub_delimiter should not be equal";
|
71
|
+
throw new ConfigException(errMsg);
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
@Override
|
76
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
77
|
+
FilterPlugin.Control control)
|
78
|
+
{
|
79
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
80
|
+
validate(task, inputSchema);
|
81
|
+
Schema outputSchema = inputSchema;
|
82
|
+
control.run(task.dump(), outputSchema);
|
83
|
+
}
|
84
|
+
|
85
|
+
@Override
|
86
|
+
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
87
|
+
Schema outputSchema, PageOutput output)
|
88
|
+
{
|
89
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
90
|
+
PageBuilder pageBuilder = new PageBuilder(
|
91
|
+
Exec.getBufferAllocator(), outputSchema, output);
|
92
|
+
PageReader pageReader = new PageReader(inputSchema);
|
93
|
+
Filter filter = new Filter(task);
|
94
|
+
ColumnVisitorImpl visitor = new ColumnVisitorImpl(
|
95
|
+
pageReader, pageBuilder, filter, task);
|
96
|
+
return new PageOutputImpl(
|
97
|
+
pageReader, pageBuilder, outputSchema, visitor);
|
98
|
+
}
|
99
|
+
|
100
|
+
public static class PageOutputImpl implements PageOutput
|
101
|
+
{
|
102
|
+
private PageReader pageReader;
|
103
|
+
private PageBuilder pageBuilder;
|
104
|
+
private Schema outputSchema;
|
105
|
+
private ColumnVisitorImpl visitor;
|
106
|
+
|
107
|
+
PageOutputImpl(PageReader pageReader, PageBuilder pageBuilder, Schema outputSchema, ColumnVisitorImpl visitor)
|
108
|
+
{
|
109
|
+
this.pageReader = pageReader;
|
110
|
+
this.pageBuilder = pageBuilder;
|
111
|
+
this.outputSchema = outputSchema;
|
112
|
+
this.visitor = visitor;
|
113
|
+
}
|
114
|
+
|
115
|
+
@Override
|
116
|
+
public void add(Page page)
|
117
|
+
{
|
118
|
+
pageReader.setPage(page);
|
119
|
+
while (pageReader.nextRecord()) {
|
120
|
+
outputSchema.visitColumns(visitor);
|
121
|
+
pageBuilder.addRecord();
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
@Override
|
126
|
+
public void finish()
|
127
|
+
{
|
128
|
+
pageBuilder.finish();
|
129
|
+
}
|
130
|
+
|
131
|
+
@Override
|
132
|
+
public void close()
|
133
|
+
{
|
134
|
+
pageBuilder.close();
|
135
|
+
}
|
136
|
+
};
|
137
|
+
}
|
@@ -0,0 +1,101 @@
|
|
1
|
+
package org.embulk.filter.json_csv2arrayofobjects;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableSet;
|
4
|
+
import org.embulk.config.ConfigException;
|
5
|
+
import org.embulk.spi.DataException;
|
6
|
+
import org.embulk.spi.type.BooleanType;
|
7
|
+
import org.embulk.spi.type.DoubleType;
|
8
|
+
import org.embulk.spi.type.JsonType;
|
9
|
+
import org.embulk.spi.type.LongType;
|
10
|
+
import org.embulk.spi.type.StringType;
|
11
|
+
import org.embulk.spi.type.TimestampType;
|
12
|
+
import org.embulk.spi.type.Type;
|
13
|
+
|
14
|
+
public class StringCast
|
15
|
+
{
|
16
|
+
public static final ImmutableSet<String> TRUE_STRINGS =
|
17
|
+
ImmutableSet.of(
|
18
|
+
"true", "True", "TRUE",
|
19
|
+
"yes", "Yes", "YES",
|
20
|
+
"t", "T", "y", "Y",
|
21
|
+
"on", "On", "ON",
|
22
|
+
"1");
|
23
|
+
|
24
|
+
public static final ImmutableSet<String> FALSE_STRINGS =
|
25
|
+
ImmutableSet.of(
|
26
|
+
"false", "False", "FALSE",
|
27
|
+
"no", "No", "NO",
|
28
|
+
"f", "F", "n", "N",
|
29
|
+
"off", "Off", "OFF",
|
30
|
+
"0");
|
31
|
+
|
32
|
+
private StringCast() {}
|
33
|
+
|
34
|
+
public static String buildErrorMessage(String as, String value)
|
35
|
+
{
|
36
|
+
return String.format("Cannot cast String to %s: \"%s\"", as, value);
|
37
|
+
}
|
38
|
+
|
39
|
+
public static boolean asBoolean(String value)
|
40
|
+
{
|
41
|
+
if (TRUE_STRINGS.contains(value)) {
|
42
|
+
return true;
|
43
|
+
}
|
44
|
+
else if (FALSE_STRINGS.contains(value)) {
|
45
|
+
return false;
|
46
|
+
}
|
47
|
+
else {
|
48
|
+
throw new DataException(buildErrorMessage("boolean", value));
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
public static double asDouble(String value)
|
53
|
+
{
|
54
|
+
try {
|
55
|
+
return Double.parseDouble(value);
|
56
|
+
}
|
57
|
+
catch (NumberFormatException ex) {
|
58
|
+
throw new DataException(buildErrorMessage("double", value), ex);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
|
62
|
+
public static long asLong(String value)
|
63
|
+
{
|
64
|
+
try {
|
65
|
+
return Long.parseLong(value);
|
66
|
+
}
|
67
|
+
catch (NumberFormatException ex) {
|
68
|
+
throw new DataException(buildErrorMessage("long", value), ex);
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
public static String asString(String value)
|
73
|
+
{
|
74
|
+
return value;
|
75
|
+
}
|
76
|
+
|
77
|
+
public static Object cast(String value, Type outputType)
|
78
|
+
{
|
79
|
+
if (outputType instanceof BooleanType) {
|
80
|
+
return asBoolean(value);
|
81
|
+
}
|
82
|
+
else if (outputType instanceof DoubleType) {
|
83
|
+
return asDouble(value);
|
84
|
+
}
|
85
|
+
else if (outputType instanceof LongType) {
|
86
|
+
return asLong(value);
|
87
|
+
}
|
88
|
+
else if (outputType instanceof StringType) {
|
89
|
+
return asString(value);
|
90
|
+
}
|
91
|
+
else if (outputType instanceof JsonType) {
|
92
|
+
throw new ConfigException("Casting to json is not supported.");
|
93
|
+
}
|
94
|
+
else if (outputType instanceof TimestampType) {
|
95
|
+
throw new ConfigException("Casting to timestamp is not supported.");
|
96
|
+
}
|
97
|
+
else {
|
98
|
+
throw new ConfigException(String.format("Invalid type: %s", outputType));
|
99
|
+
}
|
100
|
+
}
|
101
|
+
}
|
@@ -0,0 +1,238 @@
|
|
1
|
+
package org.embulk.filter.json_csv2arrayofobjects;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
import org.embulk.filter.json_csv2arrayofobjects.JsonCsv2arrayofobjectsFilterPlugin.PluginTask;
|
5
|
+
import org.embulk.spi.DataException;
|
6
|
+
import org.junit.Rule;
|
7
|
+
import org.junit.Test;
|
8
|
+
|
9
|
+
import static org.embulk.filter.json_csv2arrayofobjects.TestJsonCsv2arrayofobjectsFilterPlugin.taskFromYamlString;
|
10
|
+
import static org.junit.Assert.assertEquals;
|
11
|
+
|
12
|
+
public class TestFilter
|
13
|
+
{
|
14
|
+
@Rule
|
15
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
16
|
+
|
17
|
+
@Test
|
18
|
+
public void oneOutputKey()
|
19
|
+
{
|
20
|
+
PluginTask task = taskFromYamlString(
|
21
|
+
"type: json_csv2arrayofobjects",
|
22
|
+
"column: json_payload",
|
23
|
+
"key: key",
|
24
|
+
"output_keys:",
|
25
|
+
" - {name: name, type: string}"
|
26
|
+
);
|
27
|
+
Filter filter = new Filter(task);
|
28
|
+
String inputValue = null;
|
29
|
+
String got = null;
|
30
|
+
String expected = null;
|
31
|
+
// element size > 1
|
32
|
+
inputValue = "{\"key\": \"a,b,c\"}";
|
33
|
+
got = filter.doFilter(inputValue);
|
34
|
+
expected = "{\"key\":[{\"name\":\"a\"},{\"name\":\"b\"},{\"name\":\"c\"}]}";
|
35
|
+
assertEquals(expected, got);
|
36
|
+
// element size = 1
|
37
|
+
inputValue = "{\"key\": \"a\"}";
|
38
|
+
got = filter.doFilter(inputValue);
|
39
|
+
expected = "{\"key\":[{\"name\":\"a\"}]}";
|
40
|
+
assertEquals(expected, got);
|
41
|
+
}
|
42
|
+
|
43
|
+
@Test
|
44
|
+
public void multipleOutputKeys()
|
45
|
+
{
|
46
|
+
PluginTask task = taskFromYamlString(
|
47
|
+
"type: json_csv2arrayofobjects",
|
48
|
+
"column: json_payload",
|
49
|
+
"key: key",
|
50
|
+
"output_keys:",
|
51
|
+
" - {name: name, type: string}",
|
52
|
+
" - {name: number, type: long}"
|
53
|
+
);
|
54
|
+
Filter filter = new Filter(task);
|
55
|
+
String inputValue = null;
|
56
|
+
String got = null;
|
57
|
+
String expected = null;
|
58
|
+
// element size > 1
|
59
|
+
inputValue = "{\"key\": \"a-1,b-2,c-3\"}";
|
60
|
+
got = filter.doFilter(inputValue);
|
61
|
+
expected = "{\"key\":[{\"number\":1,\"name\":\"a\"},{\"number\":2,\"name\":\"b\"},{\"number\":3,\"name\":\"c\"}]}";
|
62
|
+
assertEquals(expected, got);
|
63
|
+
// element size = 1
|
64
|
+
inputValue = "{\"key\": \"a-1\"}";
|
65
|
+
got = filter.doFilter(inputValue);
|
66
|
+
expected = "{\"key\":[{\"number\":1,\"name\":\"a\"}]}";
|
67
|
+
assertEquals(expected, got);
|
68
|
+
}
|
69
|
+
|
70
|
+
@Test
|
71
|
+
public void nestedJson()
|
72
|
+
{
|
73
|
+
PluginTask task = taskFromYamlString(
|
74
|
+
"type: json_csv2arrayofobjects",
|
75
|
+
"column: json_payload",
|
76
|
+
"key: key1.key2",
|
77
|
+
"output_keys:",
|
78
|
+
" - {name: name, type: string}",
|
79
|
+
" - {name: number, type: double}"
|
80
|
+
);
|
81
|
+
Filter filter = new Filter(task);
|
82
|
+
String inputValue = "{\"key1\": {\"key2\": \"a-1.5\"}}";
|
83
|
+
String got = filter.doFilter(inputValue);
|
84
|
+
String expected = "{\"key1\":{\"key2\":[{\"number\":1.5,\"name\":\"a\"}]}}";
|
85
|
+
assertEquals(expected, got);
|
86
|
+
}
|
87
|
+
|
88
|
+
@Test
|
89
|
+
public void valueIsNull()
|
90
|
+
{
|
91
|
+
PluginTask task = taskFromYamlString(
|
92
|
+
"type: json_csv2arrayofobjects",
|
93
|
+
"column: json_payload",
|
94
|
+
"key: key",
|
95
|
+
"output_keys:",
|
96
|
+
" - {name: name, type: string}",
|
97
|
+
" - {name: number, type: double}"
|
98
|
+
);
|
99
|
+
Filter filter = new Filter(task);
|
100
|
+
String inputValue = "{\"key\": null}";
|
101
|
+
String got = filter.doFilter(inputValue);
|
102
|
+
String expected = "{\"key\":[]}";
|
103
|
+
assertEquals(expected, got);
|
104
|
+
}
|
105
|
+
|
106
|
+
@Test
|
107
|
+
public void skipEmptyElement()
|
108
|
+
{
|
109
|
+
PluginTask task = taskFromYamlString(
|
110
|
+
"type: json_csv2arrayofobjects",
|
111
|
+
"column: json_payload",
|
112
|
+
"key: key",
|
113
|
+
"output_keys:",
|
114
|
+
" - {name: name, type: string}",
|
115
|
+
" - {name: number, type: long}"
|
116
|
+
);
|
117
|
+
Filter filter = new Filter(task);
|
118
|
+
String inputValue = null;
|
119
|
+
String got = null;
|
120
|
+
String expected = null;
|
121
|
+
// value is empty
|
122
|
+
inputValue = "{\"key\": \"\"}";
|
123
|
+
got = filter.doFilter(inputValue);
|
124
|
+
expected = "{\"key\":[]}";
|
125
|
+
assertEquals(expected, got);
|
126
|
+
// value contains empty element
|
127
|
+
inputValue = "{\"key\": \",a-1,,,b-2,\"}";
|
128
|
+
got = filter.doFilter(inputValue);
|
129
|
+
expected = "{\"key\":[{\"number\":1,\"name\":\"a\"},{\"number\":2,\"name\":\"b\"}]}";
|
130
|
+
assertEquals(expected, got);
|
131
|
+
}
|
132
|
+
|
133
|
+
@Test
|
134
|
+
public void explicitDelimiter()
|
135
|
+
{
|
136
|
+
PluginTask task = taskFromYamlString(
|
137
|
+
"type: json_csv2arrayofobjects",
|
138
|
+
"column: json_payload",
|
139
|
+
"key: key",
|
140
|
+
"delimiter: \":\"",
|
141
|
+
"output_keys:",
|
142
|
+
" - {name: name, type: string}",
|
143
|
+
" - {name: tf, type: boolean}"
|
144
|
+
);
|
145
|
+
Filter filter = new Filter(task);
|
146
|
+
String inputValue = "{\"key\": \"a-t:b-f\"}";
|
147
|
+
String got = filter.doFilter(inputValue);
|
148
|
+
String expected = "{\"key\":[{\"tf\":true,\"name\":\"a\"},{\"tf\":false,\"name\":\"b\"}]}";
|
149
|
+
assertEquals(expected, got);
|
150
|
+
}
|
151
|
+
|
152
|
+
@Test
|
153
|
+
public void explicitSubDelimiter()
|
154
|
+
{
|
155
|
+
PluginTask task = taskFromYamlString(
|
156
|
+
"type: json_csv2arrayofobjects",
|
157
|
+
"column: json_payload",
|
158
|
+
"key: key",
|
159
|
+
"sub_delimiter: \"_\"",
|
160
|
+
"output_keys:",
|
161
|
+
" - {name: name, type: string}",
|
162
|
+
" - {name: number, type: long}"
|
163
|
+
);
|
164
|
+
Filter filter = new Filter(task);
|
165
|
+
String inputValue = "{\"key\": \"a_1,b_2\"}";
|
166
|
+
String got = filter.doFilter(inputValue);
|
167
|
+
String expected = "{\"key\":[{\"number\":1,\"name\":\"a\"},{\"number\":2,\"name\":\"b\"}]}";
|
168
|
+
assertEquals(expected, got);
|
169
|
+
}
|
170
|
+
|
171
|
+
@Test
|
172
|
+
public void explicitSequenceName()
|
173
|
+
{
|
174
|
+
PluginTask task = taskFromYamlString(
|
175
|
+
"type: json_csv2arrayofobjects",
|
176
|
+
"column: json_payload",
|
177
|
+
"key: key",
|
178
|
+
"sequence_name: seq",
|
179
|
+
"output_keys:",
|
180
|
+
" - {name: name, type: string}",
|
181
|
+
" - {name: number, type: long}"
|
182
|
+
);
|
183
|
+
Filter filter = new Filter(task);
|
184
|
+
String inputValue = "{\"key\": \"a-1,b-2\"}";
|
185
|
+
filter.doFilter(inputValue);
|
186
|
+
String got = filter.doFilter(inputValue);
|
187
|
+
String expected = "{\"key\":[{\"number\":1,\"name\":\"a\",\"seq\":0},{\"number\":2,\"name\":\"b\",\"seq\":1}]}";
|
188
|
+
assertEquals(expected, got);
|
189
|
+
}
|
190
|
+
|
191
|
+
@Test(expected = DataException.class)
|
192
|
+
public void inputValueIsNull()
|
193
|
+
{
|
194
|
+
PluginTask task = taskFromYamlString(
|
195
|
+
"type: json_csv2arrayofobjects",
|
196
|
+
"column: json_payload",
|
197
|
+
"key: key",
|
198
|
+
"output_keys:",
|
199
|
+
" - {name: name, type: string}",
|
200
|
+
" - {name: number, type: double}"
|
201
|
+
);
|
202
|
+
Filter filter = new Filter(task);
|
203
|
+
String inputValue = null;
|
204
|
+
filter.doFilter(inputValue);
|
205
|
+
}
|
206
|
+
|
207
|
+
@Test(expected = DataException.class)
|
208
|
+
public void inputValueIsEmpty()
|
209
|
+
{
|
210
|
+
PluginTask task = taskFromYamlString(
|
211
|
+
"type: json_csv2arrayofobjects",
|
212
|
+
"column: json_payload",
|
213
|
+
"key: key",
|
214
|
+
"output_keys:",
|
215
|
+
" - {name: name, type: string}",
|
216
|
+
" - {name: number, type: double}"
|
217
|
+
);
|
218
|
+
Filter filter = new Filter(task);
|
219
|
+
String inputValue = null;
|
220
|
+
filter.doFilter(inputValue);
|
221
|
+
}
|
222
|
+
|
223
|
+
@Test(expected = DataException.class)
|
224
|
+
public void invalidKey()
|
225
|
+
{
|
226
|
+
PluginTask task = taskFromYamlString(
|
227
|
+
"type: json_csv2arrayofobjects",
|
228
|
+
"column: json_payload",
|
229
|
+
"key: key1.key2",
|
230
|
+
"output_keys:",
|
231
|
+
" - {name: name, type: string}",
|
232
|
+
" - {name: number, type: long}"
|
233
|
+
);
|
234
|
+
Filter filter = new Filter(task);
|
235
|
+
String inputValue = "{\"key\": \"a-1,b-2,c-3\"}";
|
236
|
+
filter.doFilter(inputValue);
|
237
|
+
}
|
238
|
+
}
|