embulk-filter-expand_json 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/build.gradle +1 -1
- data/classpath/embulk-filter-expand_json-0.0.6.jar +0 -0
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +9 -2
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +11 -1
- data/src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java +165 -9
- metadata +3 -3
- data/classpath/embulk-filter-expand_json-0.0.5.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97eabab5d6f09e27e48f707d5c0a791ca6e7e75b
|
4
|
+
data.tar.gz: e69f5076385814eaa587b5ab0c0abdf036e53eaa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 359fe77037b220b4cee280e84c4ecd58374c5f0b22bc6acf868f1969c92168ac6840a2f190ccda81a33ea2afda1f51931ca1a00189a1de9a4b1b9e26c0e9c062
|
7
|
+
data.tar.gz: 190ff72b88236bb85369dafd69b93b80c9c8a08a843057a11cd085ac02e6c89a5f60dd63503cd8a46f9dedb1dbcd8ab5099fc341f86f12136fff8029506c5671
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
0.0.6 (2016-03-17)
|
2
|
+
==================
|
3
|
+
- [Add] Support JSON type
|
4
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/11
|
5
|
+
- [Enhancement] Validate json_column_name
|
6
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/9
|
7
|
+
|
1
8
|
0.0.5 (2016-03-04)
|
2
9
|
==================
|
3
10
|
- [Fix] Avoid `NullPointerException` if a column specified as `json_column_name` option doesn't exist in input schema.
|
data/build.gradle
CHANGED
Binary file
|
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import org.embulk.config.Config;
|
5
5
|
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.ConfigException;
|
6
7
|
import org.embulk.config.ConfigSource;
|
7
8
|
import org.embulk.config.Task;
|
8
9
|
import org.embulk.config.TaskSource;
|
@@ -13,6 +14,7 @@ import org.embulk.spi.FilterPlugin;
|
|
13
14
|
import org.embulk.spi.PageOutput;
|
14
15
|
import org.embulk.spi.Schema;
|
15
16
|
import org.embulk.spi.time.TimestampParser;
|
17
|
+
import org.embulk.spi.type.Types;
|
16
18
|
import org.slf4j.Logger;
|
17
19
|
|
18
20
|
import java.util.List;
|
@@ -47,8 +49,13 @@ public class ExpandJsonFilterPlugin
|
|
47
49
|
{
|
48
50
|
PluginTask task = config.loadConfig(PluginTask.class);
|
49
51
|
|
50
|
-
// check if
|
51
|
-
inputSchema.lookupColumn(task.getJsonColumnName());
|
52
|
+
// check if a column specified as json_column_name option exists or not
|
53
|
+
Column jsonColumn = inputSchema.lookupColumn(task.getJsonColumnName());
|
54
|
+
if (jsonColumn.getType() != Types.STRING && jsonColumn.getType() != Types.JSON) {
|
55
|
+
// throws ConfigException if the column is not string or json type.
|
56
|
+
throw new ConfigException(String.format("A column specified as json_column_name option must be string or json type: %s",
|
57
|
+
new Object[] {jsonColumn.toString()}));
|
58
|
+
}
|
52
59
|
|
53
60
|
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
54
61
|
control.run(task.dump(), outputSchema);
|
@@ -18,6 +18,7 @@ import org.embulk.spi.PageBuilder;
|
|
18
18
|
import org.embulk.spi.PageOutput;
|
19
19
|
import org.embulk.spi.PageReader;
|
20
20
|
import org.embulk.spi.Schema;
|
21
|
+
import org.embulk.spi.json.JsonParser;
|
21
22
|
import org.embulk.spi.time.TimestampParser;
|
22
23
|
import org.embulk.spi.type.Types;
|
23
24
|
import org.joda.time.DateTimeZone;
|
@@ -111,6 +112,7 @@ public class FilteredPageOutput
|
|
111
112
|
private final PageBuilder pageBuilder;
|
112
113
|
private final ObjectMapper objectMapper = new ObjectMapper();
|
113
114
|
private final ParseContext parseContext;
|
115
|
+
private final JsonParser jsonParser = new JsonParser();
|
114
116
|
|
115
117
|
private List<ExpandedColumn> initializeExpandedColumns(PluginTask task,
|
116
118
|
Schema outputSchema)
|
@@ -264,7 +266,12 @@ public class FilteredPageOutput
|
|
264
266
|
if (pageReader.isNull(jsonColumn)) {
|
265
267
|
json = null;
|
266
268
|
}
|
267
|
-
else {
|
269
|
+
else if (jsonColumn.getType() == Types.JSON) {
|
270
|
+
// TODO could use Value object directly and optimize this code
|
271
|
+
String jsonObject = pageReader.getJson(jsonColumn).toJson();
|
272
|
+
json = parseContext.parse(jsonObject);
|
273
|
+
}
|
274
|
+
else { // Types.STRING
|
268
275
|
String jsonObject = pageReader.getString(jsonColumn);
|
269
276
|
json = parseContext.parse(jsonObject);
|
270
277
|
}
|
@@ -303,6 +310,9 @@ public class FilteredPageOutput
|
|
303
310
|
throw new RuntimeException("TimestampParser is absent for column:" + expandedJsonColumn.getKey());
|
304
311
|
}
|
305
312
|
}
|
313
|
+
else if (Types.JSON.equals(expandedJsonColumn.getColumn().getType())) {
|
314
|
+
pageBuilder.setJson(expandedJsonColumn.getColumn(), jsonParser.parse(finalValue));
|
315
|
+
}
|
306
316
|
}
|
307
317
|
}
|
308
318
|
|
@@ -20,15 +20,24 @@ import org.embulk.spi.PageTestUtils;
|
|
20
20
|
import org.embulk.spi.Schema;
|
21
21
|
import org.embulk.spi.SchemaConfigException;
|
22
22
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
23
|
+
import org.embulk.spi.type.Type;
|
23
24
|
import org.junit.Before;
|
24
25
|
import org.junit.Rule;
|
25
26
|
import org.junit.Test;
|
26
27
|
import org.junit.rules.ExpectedException;
|
28
|
+
import org.msgpack.value.Value;
|
27
29
|
|
28
30
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
|
29
31
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
|
30
32
|
import static org.embulk.spi.type.Types.*;
|
31
33
|
import static org.junit.Assert.assertEquals;
|
34
|
+
import static org.msgpack.value.ValueFactory.newArray;
|
35
|
+
import static org.msgpack.value.ValueFactory.newBoolean;
|
36
|
+
import static org.msgpack.value.ValueFactory.newFloat;
|
37
|
+
import static org.msgpack.value.ValueFactory.newInteger;
|
38
|
+
import static org.msgpack.value.ValueFactory.newMap;
|
39
|
+
import static org.msgpack.value.ValueFactory.newMapBuilder;
|
40
|
+
import static org.msgpack.value.ValueFactory.newString;
|
32
41
|
|
33
42
|
public class TestExpandJsonFilterPlugin
|
34
43
|
{
|
@@ -39,16 +48,16 @@ public class TestExpandJsonFilterPlugin
|
|
39
48
|
public ExpectedException exception = ExpectedException.none();
|
40
49
|
|
41
50
|
|
42
|
-
private final Schema schema = Schema.builder()
|
43
|
-
.add("_c0", STRING)
|
44
|
-
.add("_c1", STRING)
|
45
|
-
.build();
|
46
51
|
private final String c1Data = "_c1_data";
|
52
|
+
// schema object is recreated per test method. Since each test method might require different schema,
|
53
|
+
// it's better that this field can be overwritten by each method.
|
54
|
+
private Schema schema;
|
47
55
|
private ExpandJsonFilterPlugin expandJsonFilterPlugin;
|
48
56
|
|
49
57
|
@Before
|
50
58
|
public void createResources()
|
51
59
|
{
|
60
|
+
schema = schema("_c0", STRING, "_c1", STRING); // default schema
|
52
61
|
expandJsonFilterPlugin = new ExpandJsonFilterPlugin();
|
53
62
|
}
|
54
63
|
|
@@ -112,6 +121,27 @@ public class TestExpandJsonFilterPlugin
|
|
112
121
|
});
|
113
122
|
}
|
114
123
|
|
124
|
+
@Test
|
125
|
+
public void testThrowExceptionInvalidJsonColumnType()
|
126
|
+
{
|
127
|
+
String configYaml = "" +
|
128
|
+
"type: expand_json\n" +
|
129
|
+
"json_column_name: _c2\n" +
|
130
|
+
"expanded_columns:\n" +
|
131
|
+
" - {name: _c1, type: string}";
|
132
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
133
|
+
schema = schema("_c0", STRING, "_c1", STRING, "_c2", LONG);
|
134
|
+
|
135
|
+
exception.expect(ConfigException.class);
|
136
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control() {
|
137
|
+
@Override
|
138
|
+
public void run(TaskSource taskSource, Schema schema)
|
139
|
+
{
|
140
|
+
// do nothing
|
141
|
+
}
|
142
|
+
});
|
143
|
+
}
|
144
|
+
|
115
145
|
@Test
|
116
146
|
public void testThrowExceptionAbsentExpandedColumns()
|
117
147
|
{
|
@@ -136,7 +166,8 @@ public class TestExpandJsonFilterPlugin
|
|
136
166
|
" - {name: _j2, type: long}\n" +
|
137
167
|
" - {name: _j3, type: timestamp}\n" +
|
138
168
|
" - {name: _j4, type: double}\n" +
|
139
|
-
" - {name: _j5, type: string}\n"
|
169
|
+
" - {name: _j5, type: string}\n" +
|
170
|
+
" - {name: _j6, type: json}\n";
|
140
171
|
|
141
172
|
ConfigSource config = getConfigFromYaml(configYaml);
|
142
173
|
PluginTask task = config.loadConfig(PluginTask.class);
|
@@ -163,6 +194,7 @@ public class TestExpandJsonFilterPlugin
|
|
163
194
|
" - {name: _j3, type: timestamp}\n" +
|
164
195
|
" - {name: _j4, type: double}\n" +
|
165
196
|
" - {name: _j5, type: string}\n" +
|
197
|
+
" - {name: _j6, type: json}\n" +
|
166
198
|
" - {name: _c0, type: string}\n";
|
167
199
|
|
168
200
|
ConfigSource config = getConfigFromYaml(configYaml);
|
@@ -172,15 +204,16 @@ public class TestExpandJsonFilterPlugin
|
|
172
204
|
@Override
|
173
205
|
public void run(TaskSource taskSource, Schema outputSchema)
|
174
206
|
{
|
175
|
-
assertEquals(
|
207
|
+
assertEquals(8, outputSchema.getColumnCount());
|
176
208
|
|
177
209
|
Column new_j1 = outputSchema.getColumn(0);
|
178
210
|
Column new_j2 = outputSchema.getColumn(1);
|
179
211
|
Column new_j3 = outputSchema.getColumn(2);
|
180
212
|
Column new_j4 = outputSchema.getColumn(3);
|
181
213
|
Column new_j5 = outputSchema.getColumn(4);
|
182
|
-
Column
|
183
|
-
Column
|
214
|
+
Column new_j6 = outputSchema.getColumn(5);
|
215
|
+
Column new_c0 = outputSchema.getColumn(6);
|
216
|
+
Column old_c1 = outputSchema.getColumn(7);
|
184
217
|
|
185
218
|
assertEquals("_j1", new_j1.getName());
|
186
219
|
assertEquals(BOOLEAN, new_j1.getType());
|
@@ -192,17 +225,131 @@ public class TestExpandJsonFilterPlugin
|
|
192
225
|
assertEquals(DOUBLE, new_j4.getType());
|
193
226
|
assertEquals("_j5", new_j5.getName());
|
194
227
|
assertEquals(STRING, new_j5.getType());
|
228
|
+
assertEquals("_j6", new_j6.getName());
|
229
|
+
assertEquals(JSON, new_j6.getType());
|
195
230
|
assertEquals("_c0", new_c0.getName());
|
196
231
|
assertEquals(STRING, new_c0.getType());
|
197
232
|
assertEquals("_c1", old_c1.getName());
|
198
233
|
assertEquals(STRING, old_c1.getType());
|
234
|
+
}
|
235
|
+
});
|
236
|
+
}
|
199
237
|
|
238
|
+
@Test
|
239
|
+
public void testExpandJsonValuesFromJson()
|
240
|
+
{
|
241
|
+
String configYaml = "" +
|
242
|
+
"type: expand_json\n" +
|
243
|
+
"json_column_name: _c0\n" +
|
244
|
+
"root: $.\n" +
|
245
|
+
"time_zone: Asia/Tokyo\n" +
|
246
|
+
"expanded_columns:\n" +
|
247
|
+
" - {name: _j0, type: boolean}\n" +
|
248
|
+
" - {name: _j1, type: long}\n" +
|
249
|
+
" - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
|
250
|
+
" - {name: _j3, type: double}\n" +
|
251
|
+
" - {name: _j4, type: string}\n" +
|
252
|
+
" - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
|
253
|
+
" - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
|
254
|
+
// JsonPath: https://github.com/jayway/JsonPath
|
255
|
+
" - {name: '_j7.store.book[*].author', type: string}\n" +
|
256
|
+
" - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
|
257
|
+
" - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
|
258
|
+
" - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
|
259
|
+
" - {name: '_j7.store.book[2].author', type: string}\n" +
|
260
|
+
" - {name: _c0, type: string}\n";
|
261
|
+
|
262
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
263
|
+
final Schema schema = schema("_c0", JSON, "_c1", STRING);
|
264
|
+
|
265
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
266
|
+
{
|
267
|
+
@Override
|
268
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
269
|
+
{
|
270
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
271
|
+
Value data = newMapBuilder()
|
272
|
+
.put(s("_j0"), b(true))
|
273
|
+
.put(s("_j1"), i(2))
|
274
|
+
.put(s("_j2"), s("2014-10-21 04:44:33 +0900"))
|
275
|
+
.put(s("_j3"), f(4.4))
|
276
|
+
.put(s("_j4"), s("v5"))
|
277
|
+
.put(s("_j5"), s("2014-10-21 04:44:33 +0000"))
|
278
|
+
.put(s("_j6"), s("2014-10-21 04:44:33"))
|
279
|
+
.put(s("_j7"), newMapBuilder()
|
280
|
+
.put(s("store"), newMapBuilder()
|
281
|
+
.put(s("book"), newArray(
|
282
|
+
newMap(s("author"), s("Nigel Rees"), s("title"), s("Sayings of the Century"), s("price"), f(8.95)),
|
283
|
+
newMap(s("author"), s("Evelyn Waugh"), s("title"), s("Sword of Honour"), s("price"), f(12.99)),
|
284
|
+
newMap(s("author"), s("Herman Melville"), s("title"), s("Moby Dick"), s("isbn"), s("0-553-21311-3"), s("price"), f(8.99)),
|
285
|
+
newMap(s("author"), s("J. R. R. Tolkien"), s("title"), s("The Lord of the Rings"), s("isbn"), s("0-395-19395-8"), s("price"), f(22.99))
|
286
|
+
))
|
287
|
+
.put(s("bicycle"), newMap(s("color"), s("red"), s("price"), f(19.95)))
|
288
|
+
.build())
|
289
|
+
.put(s("expensive"), i(10))
|
290
|
+
.build())
|
291
|
+
.put(s("_c0"), s("v12"))
|
292
|
+
.build();
|
293
|
+
|
294
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
295
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
|
296
|
+
pageOutput.add(page);
|
297
|
+
}
|
298
|
+
|
299
|
+
pageOutput.finish();
|
300
|
+
}
|
301
|
+
|
302
|
+
PageReader pageReader = new PageReader(outputSchema);
|
303
|
+
|
304
|
+
for (Page page : mockPageOutput.pages) {
|
305
|
+
pageReader.setPage(page);
|
306
|
+
assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
|
307
|
+
assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
|
308
|
+
assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
|
309
|
+
assertEquals(String.valueOf(4.4), String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
|
310
|
+
assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
|
311
|
+
assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
|
312
|
+
assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
|
313
|
+
assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
|
314
|
+
pageReader.getString(outputSchema.getColumn(7)));
|
315
|
+
assertEquals("[\"Nigel Rees\",\"Herman Melville\"]", pageReader.getString(outputSchema.getColumn(8)));
|
316
|
+
assertEquals("" +
|
317
|
+
"[" +
|
318
|
+
"{\"author\":\"Herman Melville\",\"title\":\"Moby Dick\",\"isbn\":\"0-553-21311-3\",\"price\":8.99}," +
|
319
|
+
"{\"author\":\"J. R. R. Tolkien\",\"title\":\"The Lord of the Rings\",\"isbn\":\"0-395-19395-8\",\"price\":22.99}" +
|
320
|
+
"]",
|
321
|
+
pageReader.getString(outputSchema.getColumn(9)));
|
322
|
+
assertEquals("[\"Sayings of the Century\"]", pageReader.getString(outputSchema.getColumn(10)));
|
323
|
+
assertEquals("Herman Melville", pageReader.getString(outputSchema.getColumn(11)));
|
324
|
+
assertEquals("v12", pageReader.getString(outputSchema.getColumn(12)));
|
325
|
+
assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(13)));
|
326
|
+
}
|
200
327
|
}
|
201
328
|
});
|
202
329
|
}
|
203
330
|
|
331
|
+
private static Value s(String value)
|
332
|
+
{
|
333
|
+
return newString(value);
|
334
|
+
}
|
335
|
+
|
336
|
+
private static Value i(int value)
|
337
|
+
{
|
338
|
+
return newInteger(value);
|
339
|
+
}
|
340
|
+
|
341
|
+
private static Value f(double value)
|
342
|
+
{
|
343
|
+
return newFloat(value);
|
344
|
+
}
|
345
|
+
|
346
|
+
private static Value b(boolean value)
|
347
|
+
{
|
348
|
+
return newBoolean(value);
|
349
|
+
}
|
350
|
+
|
204
351
|
@Test
|
205
|
-
public void
|
352
|
+
public void testExpandJsonValuesFromString()
|
206
353
|
{
|
207
354
|
String configYaml = "" +
|
208
355
|
"type: expand_json\n" +
|
@@ -429,4 +576,13 @@ public class TestExpandJsonFilterPlugin
|
|
429
576
|
}
|
430
577
|
});
|
431
578
|
}
|
579
|
+
|
580
|
+
private static Schema schema(Object... nameAndTypes)
|
581
|
+
{
|
582
|
+
Schema.Builder builder = Schema.builder();
|
583
|
+
for (int i = 0; i < nameAndTypes.length; i += 2) {
|
584
|
+
builder.add((String) nameAndTypes[i], (Type) nameAndTypes[i + 1]);
|
585
|
+
}
|
586
|
+
return builder.build();
|
587
|
+
}
|
432
588
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
66
66
|
- classpath/accessors-smart-1.1.jar
|
67
67
|
- classpath/asm-5.0.3.jar
|
68
|
-
- classpath/embulk-filter-expand_json-0.0.
|
68
|
+
- classpath/embulk-filter-expand_json-0.0.6.jar
|
69
69
|
- classpath/json-path-2.2.0.jar
|
70
70
|
- classpath/json-smart-2.2.1.jar
|
71
71
|
- classpath/slf4j-api-1.7.16.jar
|
Binary file
|