embulk-filter-json_csv2arrayofobjects 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ package org.embulk.filter.json_csv2arrayofobjects;
2
+
3
+ import com.google.common.base.Optional;
4
+
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.Task;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.spi.Column;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.FilterPlugin;
14
+ import org.embulk.spi.Page;
15
+ import org.embulk.spi.PageBuilder;
16
+ import org.embulk.spi.PageOutput;
17
+ import org.embulk.spi.PageReader;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.type.Type;
20
+
21
+ import java.util.List;
22
+
23
+ public class JsonCsv2arrayofobjectsFilterPlugin
24
+ implements FilterPlugin
25
+ {
26
+ public interface PluginTask
27
+ extends Task
28
+ {
29
+ @Config("column")
30
+ public String getColumn();
31
+
32
+ @Config("key")
33
+ public String getKey();
34
+
35
+ @Config("delimiter")
36
+ @ConfigDefault("\",\"")
37
+ public Optional<String> getDelimiter();
38
+
39
+ @Config("sub_delimiter")
40
+ @ConfigDefault("\"-\"")
41
+ public Optional<String> getSubDelimiter();
42
+
43
+ @Config("sequence_name")
44
+ @ConfigDefault("null")
45
+ public Optional<String> getSequenceName();
46
+
47
+ @Config("output_keys")
48
+ public List<JsonKeyTask> getOutputKeys();
49
+ }
50
+
51
+ public interface JsonKeyTask
52
+ extends Task
53
+ {
54
+ @Config("name")
55
+ public String getName();
56
+
57
+ @Config("type")
58
+ public Type getType();
59
+ }
60
+
61
+ public void validate(PluginTask task, Schema inputSchema)
62
+ {
63
+ // throws exception when the column does not exist
64
+ Column column = inputSchema.lookupColumn(task.getColumn());
65
+ Type colType = column.getType();
66
+ // delimiter and sub_delimtier should not be equal
67
+ String delimiter = task.getDelimiter().get();
68
+ String subDelimiter = task.getSubDelimiter().get();
69
+ if (delimiter.equals(subDelimiter)) {
70
+ String errMsg = "delimiter and sub_delimiter should not be equal";
71
+ throw new ConfigException(errMsg);
72
+ }
73
+ }
74
+
75
+ @Override
76
+ public void transaction(ConfigSource config, Schema inputSchema,
77
+ FilterPlugin.Control control)
78
+ {
79
+ PluginTask task = config.loadConfig(PluginTask.class);
80
+ validate(task, inputSchema);
81
+ Schema outputSchema = inputSchema;
82
+ control.run(task.dump(), outputSchema);
83
+ }
84
+
85
+ @Override
86
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
87
+ Schema outputSchema, PageOutput output)
88
+ {
89
+ PluginTask task = taskSource.loadTask(PluginTask.class);
90
+ PageBuilder pageBuilder = new PageBuilder(
91
+ Exec.getBufferAllocator(), outputSchema, output);
92
+ PageReader pageReader = new PageReader(inputSchema);
93
+ Filter filter = new Filter(task);
94
+ ColumnVisitorImpl visitor = new ColumnVisitorImpl(
95
+ pageReader, pageBuilder, filter, task);
96
+ return new PageOutputImpl(
97
+ pageReader, pageBuilder, outputSchema, visitor);
98
+ }
99
+
100
+ public static class PageOutputImpl implements PageOutput
101
+ {
102
+ private PageReader pageReader;
103
+ private PageBuilder pageBuilder;
104
+ private Schema outputSchema;
105
+ private ColumnVisitorImpl visitor;
106
+
107
+ PageOutputImpl(PageReader pageReader, PageBuilder pageBuilder, Schema outputSchema, ColumnVisitorImpl visitor)
108
+ {
109
+ this.pageReader = pageReader;
110
+ this.pageBuilder = pageBuilder;
111
+ this.outputSchema = outputSchema;
112
+ this.visitor = visitor;
113
+ }
114
+
115
+ @Override
116
+ public void add(Page page)
117
+ {
118
+ pageReader.setPage(page);
119
+ while (pageReader.nextRecord()) {
120
+ outputSchema.visitColumns(visitor);
121
+ pageBuilder.addRecord();
122
+ }
123
+ }
124
+
125
+ @Override
126
+ public void finish()
127
+ {
128
+ pageBuilder.finish();
129
+ }
130
+
131
+ @Override
132
+ public void close()
133
+ {
134
+ pageBuilder.close();
135
+ }
136
+ };
137
+ }
@@ -0,0 +1,101 @@
1
+ package org.embulk.filter.json_csv2arrayofobjects;
2
+
3
+ import com.google.common.collect.ImmutableSet;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.spi.DataException;
6
+ import org.embulk.spi.type.BooleanType;
7
+ import org.embulk.spi.type.DoubleType;
8
+ import org.embulk.spi.type.JsonType;
9
+ import org.embulk.spi.type.LongType;
10
+ import org.embulk.spi.type.StringType;
11
+ import org.embulk.spi.type.TimestampType;
12
+ import org.embulk.spi.type.Type;
13
+
14
+ public class StringCast
15
+ {
16
+ public static final ImmutableSet<String> TRUE_STRINGS =
17
+ ImmutableSet.of(
18
+ "true", "True", "TRUE",
19
+ "yes", "Yes", "YES",
20
+ "t", "T", "y", "Y",
21
+ "on", "On", "ON",
22
+ "1");
23
+
24
+ public static final ImmutableSet<String> FALSE_STRINGS =
25
+ ImmutableSet.of(
26
+ "false", "False", "FALSE",
27
+ "no", "No", "NO",
28
+ "f", "F", "n", "N",
29
+ "off", "Off", "OFF",
30
+ "0");
31
+
32
+ private StringCast() {}
33
+
34
+ public static String buildErrorMessage(String as, String value)
35
+ {
36
+ return String.format("Cannot cast String to %s: \"%s\"", as, value);
37
+ }
38
+
39
+ public static boolean asBoolean(String value)
40
+ {
41
+ if (TRUE_STRINGS.contains(value)) {
42
+ return true;
43
+ }
44
+ else if (FALSE_STRINGS.contains(value)) {
45
+ return false;
46
+ }
47
+ else {
48
+ throw new DataException(buildErrorMessage("boolean", value));
49
+ }
50
+ }
51
+
52
+ public static double asDouble(String value)
53
+ {
54
+ try {
55
+ return Double.parseDouble(value);
56
+ }
57
+ catch (NumberFormatException ex) {
58
+ throw new DataException(buildErrorMessage("double", value), ex);
59
+ }
60
+ }
61
+
62
+ public static long asLong(String value)
63
+ {
64
+ try {
65
+ return Long.parseLong(value);
66
+ }
67
+ catch (NumberFormatException ex) {
68
+ throw new DataException(buildErrorMessage("long", value), ex);
69
+ }
70
+ }
71
+
72
+ public static String asString(String value)
73
+ {
74
+ return value;
75
+ }
76
+
77
+ public static Object cast(String value, Type outputType)
78
+ {
79
+ if (outputType instanceof BooleanType) {
80
+ return asBoolean(value);
81
+ }
82
+ else if (outputType instanceof DoubleType) {
83
+ return asDouble(value);
84
+ }
85
+ else if (outputType instanceof LongType) {
86
+ return asLong(value);
87
+ }
88
+ else if (outputType instanceof StringType) {
89
+ return asString(value);
90
+ }
91
+ else if (outputType instanceof JsonType) {
92
+ throw new ConfigException("Casting to json is not supported.");
93
+ }
94
+ else if (outputType instanceof TimestampType) {
95
+ throw new ConfigException("Casting to timestamp is not supported.");
96
+ }
97
+ else {
98
+ throw new ConfigException(String.format("Invalid type: %s", outputType));
99
+ }
100
+ }
101
+ }
@@ -0,0 +1,238 @@
1
+ package org.embulk.filter.json_csv2arrayofobjects;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.filter.json_csv2arrayofobjects.JsonCsv2arrayofobjectsFilterPlugin.PluginTask;
5
+ import org.embulk.spi.DataException;
6
+ import org.junit.Rule;
7
+ import org.junit.Test;
8
+
9
+ import static org.embulk.filter.json_csv2arrayofobjects.TestJsonCsv2arrayofobjectsFilterPlugin.taskFromYamlString;
10
+ import static org.junit.Assert.assertEquals;
11
+
12
+ public class TestFilter
13
+ {
14
+ @Rule
15
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
16
+
17
+ @Test
18
+ public void oneOutputKey()
19
+ {
20
+ PluginTask task = taskFromYamlString(
21
+ "type: json_csv2arrayofobjects",
22
+ "column: json_payload",
23
+ "key: key",
24
+ "output_keys:",
25
+ " - {name: name, type: string}"
26
+ );
27
+ Filter filter = new Filter(task);
28
+ String inputValue = null;
29
+ String got = null;
30
+ String expected = null;
31
+ // element size > 1
32
+ inputValue = "{\"key\": \"a,b,c\"}";
33
+ got = filter.doFilter(inputValue);
34
+ expected = "{\"key\":[{\"name\":\"a\"},{\"name\":\"b\"},{\"name\":\"c\"}]}";
35
+ assertEquals(expected, got);
36
+ // element size = 1
37
+ inputValue = "{\"key\": \"a\"}";
38
+ got = filter.doFilter(inputValue);
39
+ expected = "{\"key\":[{\"name\":\"a\"}]}";
40
+ assertEquals(expected, got);
41
+ }
42
+
43
+ @Test
44
+ public void multipleOutputKeys()
45
+ {
46
+ PluginTask task = taskFromYamlString(
47
+ "type: json_csv2arrayofobjects",
48
+ "column: json_payload",
49
+ "key: key",
50
+ "output_keys:",
51
+ " - {name: name, type: string}",
52
+ " - {name: number, type: long}"
53
+ );
54
+ Filter filter = new Filter(task);
55
+ String inputValue = null;
56
+ String got = null;
57
+ String expected = null;
58
+ // element size > 1
59
+ inputValue = "{\"key\": \"a-1,b-2,c-3\"}";
60
+ got = filter.doFilter(inputValue);
61
+ expected = "{\"key\":[{\"number\":1,\"name\":\"a\"},{\"number\":2,\"name\":\"b\"},{\"number\":3,\"name\":\"c\"}]}";
62
+ assertEquals(expected, got);
63
+ // element size = 1
64
+ inputValue = "{\"key\": \"a-1\"}";
65
+ got = filter.doFilter(inputValue);
66
+ expected = "{\"key\":[{\"number\":1,\"name\":\"a\"}]}";
67
+ assertEquals(expected, got);
68
+ }
69
+
70
+ @Test
71
+ public void nestedJson()
72
+ {
73
+ PluginTask task = taskFromYamlString(
74
+ "type: json_csv2arrayofobjects",
75
+ "column: json_payload",
76
+ "key: key1.key2",
77
+ "output_keys:",
78
+ " - {name: name, type: string}",
79
+ " - {name: number, type: double}"
80
+ );
81
+ Filter filter = new Filter(task);
82
+ String inputValue = "{\"key1\": {\"key2\": \"a-1.5\"}}";
83
+ String got = filter.doFilter(inputValue);
84
+ String expected = "{\"key1\":{\"key2\":[{\"number\":1.5,\"name\":\"a\"}]}}";
85
+ assertEquals(expected, got);
86
+ }
87
+
88
+ @Test
89
+ public void valueIsNull()
90
+ {
91
+ PluginTask task = taskFromYamlString(
92
+ "type: json_csv2arrayofobjects",
93
+ "column: json_payload",
94
+ "key: key",
95
+ "output_keys:",
96
+ " - {name: name, type: string}",
97
+ " - {name: number, type: double}"
98
+ );
99
+ Filter filter = new Filter(task);
100
+ String inputValue = "{\"key\": null}";
101
+ String got = filter.doFilter(inputValue);
102
+ String expected = "{\"key\":[]}";
103
+ assertEquals(expected, got);
104
+ }
105
+
106
+ @Test
107
+ public void skipEmptyElement()
108
+ {
109
+ PluginTask task = taskFromYamlString(
110
+ "type: json_csv2arrayofobjects",
111
+ "column: json_payload",
112
+ "key: key",
113
+ "output_keys:",
114
+ " - {name: name, type: string}",
115
+ " - {name: number, type: long}"
116
+ );
117
+ Filter filter = new Filter(task);
118
+ String inputValue = null;
119
+ String got = null;
120
+ String expected = null;
121
+ // value is empty
122
+ inputValue = "{\"key\": \"\"}";
123
+ got = filter.doFilter(inputValue);
124
+ expected = "{\"key\":[]}";
125
+ assertEquals(expected, got);
126
+ // value contains empty element
127
+ inputValue = "{\"key\": \",a-1,,,b-2,\"}";
128
+ got = filter.doFilter(inputValue);
129
+ expected = "{\"key\":[{\"number\":1,\"name\":\"a\"},{\"number\":2,\"name\":\"b\"}]}";
130
+ assertEquals(expected, got);
131
+ }
132
+
133
+ @Test
134
+ public void explicitDelimiter()
135
+ {
136
+ PluginTask task = taskFromYamlString(
137
+ "type: json_csv2arrayofobjects",
138
+ "column: json_payload",
139
+ "key: key",
140
+ "delimiter: \":\"",
141
+ "output_keys:",
142
+ " - {name: name, type: string}",
143
+ " - {name: tf, type: boolean}"
144
+ );
145
+ Filter filter = new Filter(task);
146
+ String inputValue = "{\"key\": \"a-t:b-f\"}";
147
+ String got = filter.doFilter(inputValue);
148
+ String expected = "{\"key\":[{\"tf\":true,\"name\":\"a\"},{\"tf\":false,\"name\":\"b\"}]}";
149
+ assertEquals(expected, got);
150
+ }
151
+
152
+ @Test
153
+ public void explicitSubDelimiter()
154
+ {
155
+ PluginTask task = taskFromYamlString(
156
+ "type: json_csv2arrayofobjects",
157
+ "column: json_payload",
158
+ "key: key",
159
+ "sub_delimiter: \"_\"",
160
+ "output_keys:",
161
+ " - {name: name, type: string}",
162
+ " - {name: number, type: long}"
163
+ );
164
+ Filter filter = new Filter(task);
165
+ String inputValue = "{\"key\": \"a_1,b_2\"}";
166
+ String got = filter.doFilter(inputValue);
167
+ String expected = "{\"key\":[{\"number\":1,\"name\":\"a\"},{\"number\":2,\"name\":\"b\"}]}";
168
+ assertEquals(expected, got);
169
+ }
170
+
171
+ @Test
172
+ public void explicitSequenceName()
173
+ {
174
+ PluginTask task = taskFromYamlString(
175
+ "type: json_csv2arrayofobjects",
176
+ "column: json_payload",
177
+ "key: key",
178
+ "sequence_name: seq",
179
+ "output_keys:",
180
+ " - {name: name, type: string}",
181
+ " - {name: number, type: long}"
182
+ );
183
+ Filter filter = new Filter(task);
184
+ String inputValue = "{\"key\": \"a-1,b-2\"}";
185
+ filter.doFilter(inputValue);
186
+ String got = filter.doFilter(inputValue);
187
+ String expected = "{\"key\":[{\"number\":1,\"name\":\"a\",\"seq\":0},{\"number\":2,\"name\":\"b\",\"seq\":1}]}";
188
+ assertEquals(expected, got);
189
+ }
190
+
191
+ @Test(expected = DataException.class)
192
+ public void inputValueIsNull()
193
+ {
194
+ PluginTask task = taskFromYamlString(
195
+ "type: json_csv2arrayofobjects",
196
+ "column: json_payload",
197
+ "key: key",
198
+ "output_keys:",
199
+ " - {name: name, type: string}",
200
+ " - {name: number, type: double}"
201
+ );
202
+ Filter filter = new Filter(task);
203
+ String inputValue = null;
204
+ filter.doFilter(inputValue);
205
+ }
206
+
207
+ @Test(expected = DataException.class)
208
+ public void inputValueIsEmpty()
209
+ {
210
+ PluginTask task = taskFromYamlString(
211
+ "type: json_csv2arrayofobjects",
212
+ "column: json_payload",
213
+ "key: key",
214
+ "output_keys:",
215
+ " - {name: name, type: string}",
216
+ " - {name: number, type: double}"
217
+ );
218
+ Filter filter = new Filter(task);
219
+ String inputValue = null;
220
+ filter.doFilter(inputValue);
221
+ }
222
+
223
+ @Test(expected = DataException.class)
224
+ public void invalidKey()
225
+ {
226
+ PluginTask task = taskFromYamlString(
227
+ "type: json_csv2arrayofobjects",
228
+ "column: json_payload",
229
+ "key: key1.key2",
230
+ "output_keys:",
231
+ " - {name: name, type: string}",
232
+ " - {name: number, type: long}"
233
+ );
234
+ Filter filter = new Filter(task);
235
+ String inputValue = "{\"key\": \"a-1,b-2,c-3\"}";
236
+ filter.doFilter(inputValue);
237
+ }
238
+ }