embulk-filter-expand_json 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/build.gradle +1 -1
- data/classpath/embulk-filter-expand_json-0.0.6.jar +0 -0
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +9 -2
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +11 -1
- data/src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java +165 -9
- metadata +3 -3
- data/classpath/embulk-filter-expand_json-0.0.5.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97eabab5d6f09e27e48f707d5c0a791ca6e7e75b
|
4
|
+
data.tar.gz: e69f5076385814eaa587b5ab0c0abdf036e53eaa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 359fe77037b220b4cee280e84c4ecd58374c5f0b22bc6acf868f1969c92168ac6840a2f190ccda81a33ea2afda1f51931ca1a00189a1de9a4b1b9e26c0e9c062
|
7
|
+
data.tar.gz: 190ff72b88236bb85369dafd69b93b80c9c8a08a843057a11cd085ac02e6c89a5f60dd63503cd8a46f9dedb1dbcd8ab5099fc341f86f12136fff8029506c5671
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
0.0.6 (2016-03-17)
|
2
|
+
==================
|
3
|
+
- [Add] Support JSON type
|
4
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/11
|
5
|
+
- [Enhancement] Validate json_column_name
|
6
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/9
|
7
|
+
|
1
8
|
0.0.5 (2016-03-04)
|
2
9
|
==================
|
3
10
|
- [Fix] Avoid `NullPointerException` if a column specified as `json_column_name` option doesn't exist in input schema.
|
data/build.gradle
CHANGED
Binary file
|
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import org.embulk.config.Config;
|
5
5
|
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.ConfigException;
|
6
7
|
import org.embulk.config.ConfigSource;
|
7
8
|
import org.embulk.config.Task;
|
8
9
|
import org.embulk.config.TaskSource;
|
@@ -13,6 +14,7 @@ import org.embulk.spi.FilterPlugin;
|
|
13
14
|
import org.embulk.spi.PageOutput;
|
14
15
|
import org.embulk.spi.Schema;
|
15
16
|
import org.embulk.spi.time.TimestampParser;
|
17
|
+
import org.embulk.spi.type.Types;
|
16
18
|
import org.slf4j.Logger;
|
17
19
|
|
18
20
|
import java.util.List;
|
@@ -47,8 +49,13 @@ public class ExpandJsonFilterPlugin
|
|
47
49
|
{
|
48
50
|
PluginTask task = config.loadConfig(PluginTask.class);
|
49
51
|
|
50
|
-
// check if
|
51
|
-
inputSchema.lookupColumn(task.getJsonColumnName());
|
52
|
+
// check if a column specified as json_column_name option exists or not
|
53
|
+
Column jsonColumn = inputSchema.lookupColumn(task.getJsonColumnName());
|
54
|
+
if (jsonColumn.getType() != Types.STRING && jsonColumn.getType() != Types.JSON) {
|
55
|
+
// throws ConfigException if the column is not string or json type.
|
56
|
+
throw new ConfigException(String.format("A column specified as json_column_name option must be string or json type: %s",
|
57
|
+
new Object[] {jsonColumn.toString()}));
|
58
|
+
}
|
52
59
|
|
53
60
|
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
54
61
|
control.run(task.dump(), outputSchema);
|
@@ -18,6 +18,7 @@ import org.embulk.spi.PageBuilder;
|
|
18
18
|
import org.embulk.spi.PageOutput;
|
19
19
|
import org.embulk.spi.PageReader;
|
20
20
|
import org.embulk.spi.Schema;
|
21
|
+
import org.embulk.spi.json.JsonParser;
|
21
22
|
import org.embulk.spi.time.TimestampParser;
|
22
23
|
import org.embulk.spi.type.Types;
|
23
24
|
import org.joda.time.DateTimeZone;
|
@@ -111,6 +112,7 @@ public class FilteredPageOutput
|
|
111
112
|
private final PageBuilder pageBuilder;
|
112
113
|
private final ObjectMapper objectMapper = new ObjectMapper();
|
113
114
|
private final ParseContext parseContext;
|
115
|
+
private final JsonParser jsonParser = new JsonParser();
|
114
116
|
|
115
117
|
private List<ExpandedColumn> initializeExpandedColumns(PluginTask task,
|
116
118
|
Schema outputSchema)
|
@@ -264,7 +266,12 @@ public class FilteredPageOutput
|
|
264
266
|
if (pageReader.isNull(jsonColumn)) {
|
265
267
|
json = null;
|
266
268
|
}
|
267
|
-
else {
|
269
|
+
else if (jsonColumn.getType() == Types.JSON) {
|
270
|
+
// TODO could use Value object directly and optimize this code
|
271
|
+
String jsonObject = pageReader.getJson(jsonColumn).toJson();
|
272
|
+
json = parseContext.parse(jsonObject);
|
273
|
+
}
|
274
|
+
else { // Types.STRING
|
268
275
|
String jsonObject = pageReader.getString(jsonColumn);
|
269
276
|
json = parseContext.parse(jsonObject);
|
270
277
|
}
|
@@ -303,6 +310,9 @@ public class FilteredPageOutput
|
|
303
310
|
throw new RuntimeException("TimestampParser is absent for column:" + expandedJsonColumn.getKey());
|
304
311
|
}
|
305
312
|
}
|
313
|
+
else if (Types.JSON.equals(expandedJsonColumn.getColumn().getType())) {
|
314
|
+
pageBuilder.setJson(expandedJsonColumn.getColumn(), jsonParser.parse(finalValue));
|
315
|
+
}
|
306
316
|
}
|
307
317
|
}
|
308
318
|
|
@@ -20,15 +20,24 @@ import org.embulk.spi.PageTestUtils;
|
|
20
20
|
import org.embulk.spi.Schema;
|
21
21
|
import org.embulk.spi.SchemaConfigException;
|
22
22
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
23
|
+
import org.embulk.spi.type.Type;
|
23
24
|
import org.junit.Before;
|
24
25
|
import org.junit.Rule;
|
25
26
|
import org.junit.Test;
|
26
27
|
import org.junit.rules.ExpectedException;
|
28
|
+
import org.msgpack.value.Value;
|
27
29
|
|
28
30
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
|
29
31
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
|
30
32
|
import static org.embulk.spi.type.Types.*;
|
31
33
|
import static org.junit.Assert.assertEquals;
|
34
|
+
import static org.msgpack.value.ValueFactory.newArray;
|
35
|
+
import static org.msgpack.value.ValueFactory.newBoolean;
|
36
|
+
import static org.msgpack.value.ValueFactory.newFloat;
|
37
|
+
import static org.msgpack.value.ValueFactory.newInteger;
|
38
|
+
import static org.msgpack.value.ValueFactory.newMap;
|
39
|
+
import static org.msgpack.value.ValueFactory.newMapBuilder;
|
40
|
+
import static org.msgpack.value.ValueFactory.newString;
|
32
41
|
|
33
42
|
public class TestExpandJsonFilterPlugin
|
34
43
|
{
|
@@ -39,16 +48,16 @@ public class TestExpandJsonFilterPlugin
|
|
39
48
|
public ExpectedException exception = ExpectedException.none();
|
40
49
|
|
41
50
|
|
42
|
-
private final Schema schema = Schema.builder()
|
43
|
-
.add("_c0", STRING)
|
44
|
-
.add("_c1", STRING)
|
45
|
-
.build();
|
46
51
|
private final String c1Data = "_c1_data";
|
52
|
+
// schema object is recreated per test method. Since each test method might require different schema,
|
53
|
+
// it's better that this field can be overwritten by each method.
|
54
|
+
private Schema schema;
|
47
55
|
private ExpandJsonFilterPlugin expandJsonFilterPlugin;
|
48
56
|
|
49
57
|
@Before
|
50
58
|
public void createResources()
|
51
59
|
{
|
60
|
+
schema = schema("_c0", STRING, "_c1", STRING); // default schema
|
52
61
|
expandJsonFilterPlugin = new ExpandJsonFilterPlugin();
|
53
62
|
}
|
54
63
|
|
@@ -112,6 +121,27 @@ public class TestExpandJsonFilterPlugin
|
|
112
121
|
});
|
113
122
|
}
|
114
123
|
|
124
|
+
@Test
|
125
|
+
public void testThrowExceptionInvalidJsonColumnType()
|
126
|
+
{
|
127
|
+
String configYaml = "" +
|
128
|
+
"type: expand_json\n" +
|
129
|
+
"json_column_name: _c2\n" +
|
130
|
+
"expanded_columns:\n" +
|
131
|
+
" - {name: _c1, type: string}";
|
132
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
133
|
+
schema = schema("_c0", STRING, "_c1", STRING, "_c2", LONG);
|
134
|
+
|
135
|
+
exception.expect(ConfigException.class);
|
136
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control() {
|
137
|
+
@Override
|
138
|
+
public void run(TaskSource taskSource, Schema schema)
|
139
|
+
{
|
140
|
+
// do nothing
|
141
|
+
}
|
142
|
+
});
|
143
|
+
}
|
144
|
+
|
115
145
|
@Test
|
116
146
|
public void testThrowExceptionAbsentExpandedColumns()
|
117
147
|
{
|
@@ -136,7 +166,8 @@ public class TestExpandJsonFilterPlugin
|
|
136
166
|
" - {name: _j2, type: long}\n" +
|
137
167
|
" - {name: _j3, type: timestamp}\n" +
|
138
168
|
" - {name: _j4, type: double}\n" +
|
139
|
-
" - {name: _j5, type: string}\n"
|
169
|
+
" - {name: _j5, type: string}\n" +
|
170
|
+
" - {name: _j6, type: json}\n";
|
140
171
|
|
141
172
|
ConfigSource config = getConfigFromYaml(configYaml);
|
142
173
|
PluginTask task = config.loadConfig(PluginTask.class);
|
@@ -163,6 +194,7 @@ public class TestExpandJsonFilterPlugin
|
|
163
194
|
" - {name: _j3, type: timestamp}\n" +
|
164
195
|
" - {name: _j4, type: double}\n" +
|
165
196
|
" - {name: _j5, type: string}\n" +
|
197
|
+
" - {name: _j6, type: json}\n" +
|
166
198
|
" - {name: _c0, type: string}\n";
|
167
199
|
|
168
200
|
ConfigSource config = getConfigFromYaml(configYaml);
|
@@ -172,15 +204,16 @@ public class TestExpandJsonFilterPlugin
|
|
172
204
|
@Override
|
173
205
|
public void run(TaskSource taskSource, Schema outputSchema)
|
174
206
|
{
|
175
|
-
assertEquals(
|
207
|
+
assertEquals(8, outputSchema.getColumnCount());
|
176
208
|
|
177
209
|
Column new_j1 = outputSchema.getColumn(0);
|
178
210
|
Column new_j2 = outputSchema.getColumn(1);
|
179
211
|
Column new_j3 = outputSchema.getColumn(2);
|
180
212
|
Column new_j4 = outputSchema.getColumn(3);
|
181
213
|
Column new_j5 = outputSchema.getColumn(4);
|
182
|
-
Column
|
183
|
-
Column
|
214
|
+
Column new_j6 = outputSchema.getColumn(5);
|
215
|
+
Column new_c0 = outputSchema.getColumn(6);
|
216
|
+
Column old_c1 = outputSchema.getColumn(7);
|
184
217
|
|
185
218
|
assertEquals("_j1", new_j1.getName());
|
186
219
|
assertEquals(BOOLEAN, new_j1.getType());
|
@@ -192,17 +225,131 @@ public class TestExpandJsonFilterPlugin
|
|
192
225
|
assertEquals(DOUBLE, new_j4.getType());
|
193
226
|
assertEquals("_j5", new_j5.getName());
|
194
227
|
assertEquals(STRING, new_j5.getType());
|
228
|
+
assertEquals("_j6", new_j6.getName());
|
229
|
+
assertEquals(JSON, new_j6.getType());
|
195
230
|
assertEquals("_c0", new_c0.getName());
|
196
231
|
assertEquals(STRING, new_c0.getType());
|
197
232
|
assertEquals("_c1", old_c1.getName());
|
198
233
|
assertEquals(STRING, old_c1.getType());
|
234
|
+
}
|
235
|
+
});
|
236
|
+
}
|
199
237
|
|
238
|
+
@Test
|
239
|
+
public void testExpandJsonValuesFromJson()
|
240
|
+
{
|
241
|
+
String configYaml = "" +
|
242
|
+
"type: expand_json\n" +
|
243
|
+
"json_column_name: _c0\n" +
|
244
|
+
"root: $.\n" +
|
245
|
+
"time_zone: Asia/Tokyo\n" +
|
246
|
+
"expanded_columns:\n" +
|
247
|
+
" - {name: _j0, type: boolean}\n" +
|
248
|
+
" - {name: _j1, type: long}\n" +
|
249
|
+
" - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
|
250
|
+
" - {name: _j3, type: double}\n" +
|
251
|
+
" - {name: _j4, type: string}\n" +
|
252
|
+
" - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
|
253
|
+
" - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
|
254
|
+
// JsonPath: https://github.com/jayway/JsonPath
|
255
|
+
" - {name: '_j7.store.book[*].author', type: string}\n" +
|
256
|
+
" - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
|
257
|
+
" - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
|
258
|
+
" - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
|
259
|
+
" - {name: '_j7.store.book[2].author', type: string}\n" +
|
260
|
+
" - {name: _c0, type: string}\n";
|
261
|
+
|
262
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
263
|
+
final Schema schema = schema("_c0", JSON, "_c1", STRING);
|
264
|
+
|
265
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
266
|
+
{
|
267
|
+
@Override
|
268
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
269
|
+
{
|
270
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
271
|
+
Value data = newMapBuilder()
|
272
|
+
.put(s("_j0"), b(true))
|
273
|
+
.put(s("_j1"), i(2))
|
274
|
+
.put(s("_j2"), s("2014-10-21 04:44:33 +0900"))
|
275
|
+
.put(s("_j3"), f(4.4))
|
276
|
+
.put(s("_j4"), s("v5"))
|
277
|
+
.put(s("_j5"), s("2014-10-21 04:44:33 +0000"))
|
278
|
+
.put(s("_j6"), s("2014-10-21 04:44:33"))
|
279
|
+
.put(s("_j7"), newMapBuilder()
|
280
|
+
.put(s("store"), newMapBuilder()
|
281
|
+
.put(s("book"), newArray(
|
282
|
+
newMap(s("author"), s("Nigel Rees"), s("title"), s("Sayings of the Century"), s("price"), f(8.95)),
|
283
|
+
newMap(s("author"), s("Evelyn Waugh"), s("title"), s("Sword of Honour"), s("price"), f(12.99)),
|
284
|
+
newMap(s("author"), s("Herman Melville"), s("title"), s("Moby Dick"), s("isbn"), s("0-553-21311-3"), s("price"), f(8.99)),
|
285
|
+
newMap(s("author"), s("J. R. R. Tolkien"), s("title"), s("The Lord of the Rings"), s("isbn"), s("0-395-19395-8"), s("price"), f(22.99))
|
286
|
+
))
|
287
|
+
.put(s("bicycle"), newMap(s("color"), s("red"), s("price"), f(19.95)))
|
288
|
+
.build())
|
289
|
+
.put(s("expensive"), i(10))
|
290
|
+
.build())
|
291
|
+
.put(s("_c0"), s("v12"))
|
292
|
+
.build();
|
293
|
+
|
294
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
295
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
|
296
|
+
pageOutput.add(page);
|
297
|
+
}
|
298
|
+
|
299
|
+
pageOutput.finish();
|
300
|
+
}
|
301
|
+
|
302
|
+
PageReader pageReader = new PageReader(outputSchema);
|
303
|
+
|
304
|
+
for (Page page : mockPageOutput.pages) {
|
305
|
+
pageReader.setPage(page);
|
306
|
+
assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
|
307
|
+
assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
|
308
|
+
assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
|
309
|
+
assertEquals(String.valueOf(4.4), String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
|
310
|
+
assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
|
311
|
+
assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
|
312
|
+
assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
|
313
|
+
assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
|
314
|
+
pageReader.getString(outputSchema.getColumn(7)));
|
315
|
+
assertEquals("[\"Nigel Rees\",\"Herman Melville\"]", pageReader.getString(outputSchema.getColumn(8)));
|
316
|
+
assertEquals("" +
|
317
|
+
"[" +
|
318
|
+
"{\"author\":\"Herman Melville\",\"title\":\"Moby Dick\",\"isbn\":\"0-553-21311-3\",\"price\":8.99}," +
|
319
|
+
"{\"author\":\"J. R. R. Tolkien\",\"title\":\"The Lord of the Rings\",\"isbn\":\"0-395-19395-8\",\"price\":22.99}" +
|
320
|
+
"]",
|
321
|
+
pageReader.getString(outputSchema.getColumn(9)));
|
322
|
+
assertEquals("[\"Sayings of the Century\"]", pageReader.getString(outputSchema.getColumn(10)));
|
323
|
+
assertEquals("Herman Melville", pageReader.getString(outputSchema.getColumn(11)));
|
324
|
+
assertEquals("v12", pageReader.getString(outputSchema.getColumn(12)));
|
325
|
+
assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(13)));
|
326
|
+
}
|
200
327
|
}
|
201
328
|
});
|
202
329
|
}
|
203
330
|
|
331
|
+
private static Value s(String value)
|
332
|
+
{
|
333
|
+
return newString(value);
|
334
|
+
}
|
335
|
+
|
336
|
+
private static Value i(int value)
|
337
|
+
{
|
338
|
+
return newInteger(value);
|
339
|
+
}
|
340
|
+
|
341
|
+
private static Value f(double value)
|
342
|
+
{
|
343
|
+
return newFloat(value);
|
344
|
+
}
|
345
|
+
|
346
|
+
private static Value b(boolean value)
|
347
|
+
{
|
348
|
+
return newBoolean(value);
|
349
|
+
}
|
350
|
+
|
204
351
|
@Test
|
205
|
-
public void
|
352
|
+
public void testExpandJsonValuesFromString()
|
206
353
|
{
|
207
354
|
String configYaml = "" +
|
208
355
|
"type: expand_json\n" +
|
@@ -429,4 +576,13 @@ public class TestExpandJsonFilterPlugin
|
|
429
576
|
}
|
430
577
|
});
|
431
578
|
}
|
579
|
+
|
580
|
+
private static Schema schema(Object... nameAndTypes)
|
581
|
+
{
|
582
|
+
Schema.Builder builder = Schema.builder();
|
583
|
+
for (int i = 0; i < nameAndTypes.length; i += 2) {
|
584
|
+
builder.add((String) nameAndTypes[i], (Type) nameAndTypes[i + 1]);
|
585
|
+
}
|
586
|
+
return builder.build();
|
587
|
+
}
|
432
588
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
66
66
|
- classpath/accessors-smart-1.1.jar
|
67
67
|
- classpath/asm-5.0.3.jar
|
68
|
-
- classpath/embulk-filter-expand_json-0.0.
|
68
|
+
- classpath/embulk-filter-expand_json-0.0.6.jar
|
69
69
|
- classpath/json-path-2.2.0.jar
|
70
70
|
- classpath/json-smart-2.2.1.jar
|
71
71
|
- classpath/slf4j-api-1.7.16.jar
|
Binary file
|