embulk-filter-expand_json 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07943e1a2bf6a447bccb93cbd9ec0412f00382f0
4
- data.tar.gz: 0051fdf7de1702ddd169afb6d9a4fe9ce53e0ded
3
+ metadata.gz: 97eabab5d6f09e27e48f707d5c0a791ca6e7e75b
4
+ data.tar.gz: e69f5076385814eaa587b5ab0c0abdf036e53eaa
5
5
  SHA512:
6
- metadata.gz: 860428fccd8b863e1070ef0e23f657c88e5312031aa222588e51b20c7caa394b2425f26dfc3322fdf69dbc0d3984ff524e3969666cb9a53c68672ead92fd8c25
7
- data.tar.gz: d862d2cd98f348242f78a04d5cc2d4117cd4d3923428e181a40b778b509aaf37630abe97158a0f1a8d9351bffcd990aa8675ee5076f522575d86972c8f4bf6d2
6
+ metadata.gz: 359fe77037b220b4cee280e84c4ecd58374c5f0b22bc6acf868f1969c92168ac6840a2f190ccda81a33ea2afda1f51931ca1a00189a1de9a4b1b9e26c0e9c062
7
+ data.tar.gz: 190ff72b88236bb85369dafd69b93b80c9c8a08a843057a11cd085ac02e6c89a5f60dd63503cd8a46f9dedb1dbcd8ab5099fc341f86f12136fff8029506c5671
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ 0.0.6 (2016-03-17)
2
+ ==================
3
+ - [Add] Support JSON type
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/11
5
+ - [Enhancement] Validate json_column_name
6
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/9
7
+
1
8
  0.0.5 (2016-03-04)
2
9
  ==================
3
10
  - [Fix] Avoid `NullPointerException` if a column specified as `json_column_name` option doesn't exist in input schema.
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.0.5"
18
+ version = "0.0.6"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
3
3
  import com.google.common.collect.ImmutableList;
4
4
  import org.embulk.config.Config;
5
5
  import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.ConfigException;
6
7
  import org.embulk.config.ConfigSource;
7
8
  import org.embulk.config.Task;
8
9
  import org.embulk.config.TaskSource;
@@ -13,6 +14,7 @@ import org.embulk.spi.FilterPlugin;
13
14
  import org.embulk.spi.PageOutput;
14
15
  import org.embulk.spi.Schema;
15
16
  import org.embulk.spi.time.TimestampParser;
17
+ import org.embulk.spi.type.Types;
16
18
  import org.slf4j.Logger;
17
19
 
18
20
  import java.util.List;
@@ -47,8 +49,13 @@ public class ExpandJsonFilterPlugin
47
49
  {
48
50
  PluginTask task = config.loadConfig(PluginTask.class);
49
51
 
50
- // check if the specified json column exists or not
51
- inputSchema.lookupColumn(task.getJsonColumnName());
52
+ // check if a column specified as json_column_name option exists or not
53
+ Column jsonColumn = inputSchema.lookupColumn(task.getJsonColumnName());
54
+ if (jsonColumn.getType() != Types.STRING && jsonColumn.getType() != Types.JSON) {
55
+ // throws ConfigException if the column is not string or json type.
56
+ throw new ConfigException(String.format("A column specified as json_column_name option must be string or json type: %s",
57
+ new Object[] {jsonColumn.toString()}));
58
+ }
52
59
 
53
60
  Schema outputSchema = buildOutputSchema(task, inputSchema);
54
61
  control.run(task.dump(), outputSchema);
@@ -18,6 +18,7 @@ import org.embulk.spi.PageBuilder;
18
18
  import org.embulk.spi.PageOutput;
19
19
  import org.embulk.spi.PageReader;
20
20
  import org.embulk.spi.Schema;
21
+ import org.embulk.spi.json.JsonParser;
21
22
  import org.embulk.spi.time.TimestampParser;
22
23
  import org.embulk.spi.type.Types;
23
24
  import org.joda.time.DateTimeZone;
@@ -111,6 +112,7 @@ public class FilteredPageOutput
111
112
  private final PageBuilder pageBuilder;
112
113
  private final ObjectMapper objectMapper = new ObjectMapper();
113
114
  private final ParseContext parseContext;
115
+ private final JsonParser jsonParser = new JsonParser();
114
116
 
115
117
  private List<ExpandedColumn> initializeExpandedColumns(PluginTask task,
116
118
  Schema outputSchema)
@@ -264,7 +266,12 @@ public class FilteredPageOutput
264
266
  if (pageReader.isNull(jsonColumn)) {
265
267
  json = null;
266
268
  }
267
- else {
269
+ else if (jsonColumn.getType() == Types.JSON) {
270
+ // TODO could use Value object directly and optimize this code
271
+ String jsonObject = pageReader.getJson(jsonColumn).toJson();
272
+ json = parseContext.parse(jsonObject);
273
+ }
274
+ else { // Types.STRING
268
275
  String jsonObject = pageReader.getString(jsonColumn);
269
276
  json = parseContext.parse(jsonObject);
270
277
  }
@@ -303,6 +310,9 @@ public class FilteredPageOutput
303
310
  throw new RuntimeException("TimestampParser is absent for column:" + expandedJsonColumn.getKey());
304
311
  }
305
312
  }
313
+ else if (Types.JSON.equals(expandedJsonColumn.getColumn().getType())) {
314
+ pageBuilder.setJson(expandedJsonColumn.getColumn(), jsonParser.parse(finalValue));
315
+ }
306
316
  }
307
317
  }
308
318
 
@@ -20,15 +20,24 @@ import org.embulk.spi.PageTestUtils;
20
20
  import org.embulk.spi.Schema;
21
21
  import org.embulk.spi.SchemaConfigException;
22
22
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
23
+ import org.embulk.spi.type.Type;
23
24
  import org.junit.Before;
24
25
  import org.junit.Rule;
25
26
  import org.junit.Test;
26
27
  import org.junit.rules.ExpectedException;
28
+ import org.msgpack.value.Value;
27
29
 
28
30
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
29
31
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
30
32
  import static org.embulk.spi.type.Types.*;
31
33
  import static org.junit.Assert.assertEquals;
34
+ import static org.msgpack.value.ValueFactory.newArray;
35
+ import static org.msgpack.value.ValueFactory.newBoolean;
36
+ import static org.msgpack.value.ValueFactory.newFloat;
37
+ import static org.msgpack.value.ValueFactory.newInteger;
38
+ import static org.msgpack.value.ValueFactory.newMap;
39
+ import static org.msgpack.value.ValueFactory.newMapBuilder;
40
+ import static org.msgpack.value.ValueFactory.newString;
32
41
 
33
42
  public class TestExpandJsonFilterPlugin
34
43
  {
@@ -39,16 +48,16 @@ public class TestExpandJsonFilterPlugin
39
48
  public ExpectedException exception = ExpectedException.none();
40
49
 
41
50
 
42
- private final Schema schema = Schema.builder()
43
- .add("_c0", STRING)
44
- .add("_c1", STRING)
45
- .build();
46
51
  private final String c1Data = "_c1_data";
52
+ // schema object is recreated per test method. Since each test method might require different schema,
53
+ // it's better that this field can be overwritten by each method.
54
+ private Schema schema;
47
55
  private ExpandJsonFilterPlugin expandJsonFilterPlugin;
48
56
 
49
57
  @Before
50
58
  public void createResources()
51
59
  {
60
+ schema = schema("_c0", STRING, "_c1", STRING); // default schema
52
61
  expandJsonFilterPlugin = new ExpandJsonFilterPlugin();
53
62
  }
54
63
 
@@ -112,6 +121,27 @@ public class TestExpandJsonFilterPlugin
112
121
  });
113
122
  }
114
123
 
124
+ @Test
125
+ public void testThrowExceptionInvalidJsonColumnType()
126
+ {
127
+ String configYaml = "" +
128
+ "type: expand_json\n" +
129
+ "json_column_name: _c2\n" +
130
+ "expanded_columns:\n" +
131
+ " - {name: _c1, type: string}";
132
+ ConfigSource config = getConfigFromYaml(configYaml);
133
+ schema = schema("_c0", STRING, "_c1", STRING, "_c2", LONG);
134
+
135
+ exception.expect(ConfigException.class);
136
+ expandJsonFilterPlugin.transaction(config, schema, new Control() {
137
+ @Override
138
+ public void run(TaskSource taskSource, Schema schema)
139
+ {
140
+ // do nothing
141
+ }
142
+ });
143
+ }
144
+
115
145
  @Test
116
146
  public void testThrowExceptionAbsentExpandedColumns()
117
147
  {
@@ -136,7 +166,8 @@ public class TestExpandJsonFilterPlugin
136
166
  " - {name: _j2, type: long}\n" +
137
167
  " - {name: _j3, type: timestamp}\n" +
138
168
  " - {name: _j4, type: double}\n" +
139
- " - {name: _j5, type: string}\n";
169
+ " - {name: _j5, type: string}\n" +
170
+ " - {name: _j6, type: json}\n";
140
171
 
141
172
  ConfigSource config = getConfigFromYaml(configYaml);
142
173
  PluginTask task = config.loadConfig(PluginTask.class);
@@ -163,6 +194,7 @@ public class TestExpandJsonFilterPlugin
163
194
  " - {name: _j3, type: timestamp}\n" +
164
195
  " - {name: _j4, type: double}\n" +
165
196
  " - {name: _j5, type: string}\n" +
197
+ " - {name: _j6, type: json}\n" +
166
198
  " - {name: _c0, type: string}\n";
167
199
 
168
200
  ConfigSource config = getConfigFromYaml(configYaml);
@@ -172,15 +204,16 @@ public class TestExpandJsonFilterPlugin
172
204
  @Override
173
205
  public void run(TaskSource taskSource, Schema outputSchema)
174
206
  {
175
- assertEquals(7, outputSchema.getColumnCount());
207
+ assertEquals(8, outputSchema.getColumnCount());
176
208
 
177
209
  Column new_j1 = outputSchema.getColumn(0);
178
210
  Column new_j2 = outputSchema.getColumn(1);
179
211
  Column new_j3 = outputSchema.getColumn(2);
180
212
  Column new_j4 = outputSchema.getColumn(3);
181
213
  Column new_j5 = outputSchema.getColumn(4);
182
- Column new_c0 = outputSchema.getColumn(5);
183
- Column old_c1 = outputSchema.getColumn(6);
214
+ Column new_j6 = outputSchema.getColumn(5);
215
+ Column new_c0 = outputSchema.getColumn(6);
216
+ Column old_c1 = outputSchema.getColumn(7);
184
217
 
185
218
  assertEquals("_j1", new_j1.getName());
186
219
  assertEquals(BOOLEAN, new_j1.getType());
@@ -192,17 +225,131 @@ public class TestExpandJsonFilterPlugin
192
225
  assertEquals(DOUBLE, new_j4.getType());
193
226
  assertEquals("_j5", new_j5.getName());
194
227
  assertEquals(STRING, new_j5.getType());
228
+ assertEquals("_j6", new_j6.getName());
229
+ assertEquals(JSON, new_j6.getType());
195
230
  assertEquals("_c0", new_c0.getName());
196
231
  assertEquals(STRING, new_c0.getType());
197
232
  assertEquals("_c1", old_c1.getName());
198
233
  assertEquals(STRING, old_c1.getType());
234
+ }
235
+ });
236
+ }
199
237
 
238
+ @Test
239
+ public void testExpandJsonValuesFromJson()
240
+ {
241
+ String configYaml = "" +
242
+ "type: expand_json\n" +
243
+ "json_column_name: _c0\n" +
244
+ "root: $.\n" +
245
+ "time_zone: Asia/Tokyo\n" +
246
+ "expanded_columns:\n" +
247
+ " - {name: _j0, type: boolean}\n" +
248
+ " - {name: _j1, type: long}\n" +
249
+ " - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
250
+ " - {name: _j3, type: double}\n" +
251
+ " - {name: _j4, type: string}\n" +
252
+ " - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
253
+ " - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
254
+ // JsonPath: https://github.com/jayway/JsonPath
255
+ " - {name: '_j7.store.book[*].author', type: string}\n" +
256
+ " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
257
+ " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
258
+ " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
259
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
260
+ " - {name: _c0, type: string}\n";
261
+
262
+ ConfigSource config = getConfigFromYaml(configYaml);
263
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
264
+
265
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
266
+ {
267
+ @Override
268
+ public void run(TaskSource taskSource, Schema outputSchema)
269
+ {
270
+ MockPageOutput mockPageOutput = new MockPageOutput();
271
+ Value data = newMapBuilder()
272
+ .put(s("_j0"), b(true))
273
+ .put(s("_j1"), i(2))
274
+ .put(s("_j2"), s("2014-10-21 04:44:33 +0900"))
275
+ .put(s("_j3"), f(4.4))
276
+ .put(s("_j4"), s("v5"))
277
+ .put(s("_j5"), s("2014-10-21 04:44:33 +0000"))
278
+ .put(s("_j6"), s("2014-10-21 04:44:33"))
279
+ .put(s("_j7"), newMapBuilder()
280
+ .put(s("store"), newMapBuilder()
281
+ .put(s("book"), newArray(
282
+ newMap(s("author"), s("Nigel Rees"), s("title"), s("Sayings of the Century"), s("price"), f(8.95)),
283
+ newMap(s("author"), s("Evelyn Waugh"), s("title"), s("Sword of Honour"), s("price"), f(12.99)),
284
+ newMap(s("author"), s("Herman Melville"), s("title"), s("Moby Dick"), s("isbn"), s("0-553-21311-3"), s("price"), f(8.99)),
285
+ newMap(s("author"), s("J. R. R. Tolkien"), s("title"), s("The Lord of the Rings"), s("isbn"), s("0-395-19395-8"), s("price"), f(22.99))
286
+ ))
287
+ .put(s("bicycle"), newMap(s("color"), s("red"), s("price"), f(19.95)))
288
+ .build())
289
+ .put(s("expensive"), i(10))
290
+ .build())
291
+ .put(s("_c0"), s("v12"))
292
+ .build();
293
+
294
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
295
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
296
+ pageOutput.add(page);
297
+ }
298
+
299
+ pageOutput.finish();
300
+ }
301
+
302
+ PageReader pageReader = new PageReader(outputSchema);
303
+
304
+ for (Page page : mockPageOutput.pages) {
305
+ pageReader.setPage(page);
306
+ assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
307
+ assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
308
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
309
+ assertEquals(String.valueOf(4.4), String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
310
+ assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
311
+ assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
312
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
313
+ assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
314
+ pageReader.getString(outputSchema.getColumn(7)));
315
+ assertEquals("[\"Nigel Rees\",\"Herman Melville\"]", pageReader.getString(outputSchema.getColumn(8)));
316
+ assertEquals("" +
317
+ "[" +
318
+ "{\"author\":\"Herman Melville\",\"title\":\"Moby Dick\",\"isbn\":\"0-553-21311-3\",\"price\":8.99}," +
319
+ "{\"author\":\"J. R. R. Tolkien\",\"title\":\"The Lord of the Rings\",\"isbn\":\"0-395-19395-8\",\"price\":22.99}" +
320
+ "]",
321
+ pageReader.getString(outputSchema.getColumn(9)));
322
+ assertEquals("[\"Sayings of the Century\"]", pageReader.getString(outputSchema.getColumn(10)));
323
+ assertEquals("Herman Melville", pageReader.getString(outputSchema.getColumn(11)));
324
+ assertEquals("v12", pageReader.getString(outputSchema.getColumn(12)));
325
+ assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(13)));
326
+ }
200
327
  }
201
328
  });
202
329
  }
203
330
 
331
+ private static Value s(String value)
332
+ {
333
+ return newString(value);
334
+ }
335
+
336
+ private static Value i(int value)
337
+ {
338
+ return newInteger(value);
339
+ }
340
+
341
+ private static Value f(double value)
342
+ {
343
+ return newFloat(value);
344
+ }
345
+
346
+ private static Value b(boolean value)
347
+ {
348
+ return newBoolean(value);
349
+ }
350
+
204
351
  @Test
205
- public void testExpandJsonValues()
352
+ public void testExpandJsonValuesFromString()
206
353
  {
207
354
  String configYaml = "" +
208
355
  "type: expand_json\n" +
@@ -429,4 +576,13 @@ public class TestExpandJsonFilterPlugin
429
576
  }
430
577
  });
431
578
  }
579
+
580
+ private static Schema schema(Object... nameAndTypes)
581
+ {
582
+ Schema.Builder builder = Schema.builder();
583
+ for (int i = 0; i < nameAndTypes.length; i += 2) {
584
+ builder.add((String) nameAndTypes[i], (Type) nameAndTypes[i + 1]);
585
+ }
586
+ return builder.build();
587
+ }
432
588
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-04 00:00:00.000000000 Z
11
+ date: 2016-03-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
66
66
  - classpath/accessors-smart-1.1.jar
67
67
  - classpath/asm-5.0.3.jar
68
- - classpath/embulk-filter-expand_json-0.0.5.jar
68
+ - classpath/embulk-filter-expand_json-0.0.6.jar
69
69
  - classpath/json-path-2.2.0.jar
70
70
  - classpath/json-smart-2.2.1.jar
71
71
  - classpath/slf4j-api-1.7.16.jar