embulk-filter-expand_json 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07943e1a2bf6a447bccb93cbd9ec0412f00382f0
4
- data.tar.gz: 0051fdf7de1702ddd169afb6d9a4fe9ce53e0ded
3
+ metadata.gz: 97eabab5d6f09e27e48f707d5c0a791ca6e7e75b
4
+ data.tar.gz: e69f5076385814eaa587b5ab0c0abdf036e53eaa
5
5
  SHA512:
6
- metadata.gz: 860428fccd8b863e1070ef0e23f657c88e5312031aa222588e51b20c7caa394b2425f26dfc3322fdf69dbc0d3984ff524e3969666cb9a53c68672ead92fd8c25
7
- data.tar.gz: d862d2cd98f348242f78a04d5cc2d4117cd4d3923428e181a40b778b509aaf37630abe97158a0f1a8d9351bffcd990aa8675ee5076f522575d86972c8f4bf6d2
6
+ metadata.gz: 359fe77037b220b4cee280e84c4ecd58374c5f0b22bc6acf868f1969c92168ac6840a2f190ccda81a33ea2afda1f51931ca1a00189a1de9a4b1b9e26c0e9c062
7
+ data.tar.gz: 190ff72b88236bb85369dafd69b93b80c9c8a08a843057a11cd085ac02e6c89a5f60dd63503cd8a46f9dedb1dbcd8ab5099fc341f86f12136fff8029506c5671
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ 0.0.6 (2016-03-17)
2
+ ==================
3
+ - [Add] Support JSON type
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/11
5
+ - [Enhancement] Validate json_column_name
6
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/9
7
+
1
8
  0.0.5 (2016-03-04)
2
9
  ==================
3
10
  - [Fix] Avoid `NullPointerException` if a column specified as `json_column_name` option doesn't exist in input schema.
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.0.5"
18
+ version = "0.0.6"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
3
3
  import com.google.common.collect.ImmutableList;
4
4
  import org.embulk.config.Config;
5
5
  import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.ConfigException;
6
7
  import org.embulk.config.ConfigSource;
7
8
  import org.embulk.config.Task;
8
9
  import org.embulk.config.TaskSource;
@@ -13,6 +14,7 @@ import org.embulk.spi.FilterPlugin;
13
14
  import org.embulk.spi.PageOutput;
14
15
  import org.embulk.spi.Schema;
15
16
  import org.embulk.spi.time.TimestampParser;
17
+ import org.embulk.spi.type.Types;
16
18
  import org.slf4j.Logger;
17
19
 
18
20
  import java.util.List;
@@ -47,8 +49,13 @@ public class ExpandJsonFilterPlugin
47
49
  {
48
50
  PluginTask task = config.loadConfig(PluginTask.class);
49
51
 
50
- // check if the specified json column exists or not
51
- inputSchema.lookupColumn(task.getJsonColumnName());
52
+ // check if a column specified as json_column_name option exists or not
53
+ Column jsonColumn = inputSchema.lookupColumn(task.getJsonColumnName());
54
+ if (jsonColumn.getType() != Types.STRING && jsonColumn.getType() != Types.JSON) {
55
+ // throws ConfigException if the column is not string or json type.
56
+ throw new ConfigException(String.format("A column specified as json_column_name option must be string or json type: %s",
57
+ new Object[] {jsonColumn.toString()}));
58
+ }
52
59
 
53
60
  Schema outputSchema = buildOutputSchema(task, inputSchema);
54
61
  control.run(task.dump(), outputSchema);
@@ -18,6 +18,7 @@ import org.embulk.spi.PageBuilder;
18
18
  import org.embulk.spi.PageOutput;
19
19
  import org.embulk.spi.PageReader;
20
20
  import org.embulk.spi.Schema;
21
+ import org.embulk.spi.json.JsonParser;
21
22
  import org.embulk.spi.time.TimestampParser;
22
23
  import org.embulk.spi.type.Types;
23
24
  import org.joda.time.DateTimeZone;
@@ -111,6 +112,7 @@ public class FilteredPageOutput
111
112
  private final PageBuilder pageBuilder;
112
113
  private final ObjectMapper objectMapper = new ObjectMapper();
113
114
  private final ParseContext parseContext;
115
+ private final JsonParser jsonParser = new JsonParser();
114
116
 
115
117
  private List<ExpandedColumn> initializeExpandedColumns(PluginTask task,
116
118
  Schema outputSchema)
@@ -264,7 +266,12 @@ public class FilteredPageOutput
264
266
  if (pageReader.isNull(jsonColumn)) {
265
267
  json = null;
266
268
  }
267
- else {
269
+ else if (jsonColumn.getType() == Types.JSON) {
270
+ // TODO could use Value object directly and optimize this code
271
+ String jsonObject = pageReader.getJson(jsonColumn).toJson();
272
+ json = parseContext.parse(jsonObject);
273
+ }
274
+ else { // Types.STRING
268
275
  String jsonObject = pageReader.getString(jsonColumn);
269
276
  json = parseContext.parse(jsonObject);
270
277
  }
@@ -303,6 +310,9 @@ public class FilteredPageOutput
303
310
  throw new RuntimeException("TimestampParser is absent for column:" + expandedJsonColumn.getKey());
304
311
  }
305
312
  }
313
+ else if (Types.JSON.equals(expandedJsonColumn.getColumn().getType())) {
314
+ pageBuilder.setJson(expandedJsonColumn.getColumn(), jsonParser.parse(finalValue));
315
+ }
306
316
  }
307
317
  }
308
318
 
@@ -20,15 +20,24 @@ import org.embulk.spi.PageTestUtils;
20
20
  import org.embulk.spi.Schema;
21
21
  import org.embulk.spi.SchemaConfigException;
22
22
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
23
+ import org.embulk.spi.type.Type;
23
24
  import org.junit.Before;
24
25
  import org.junit.Rule;
25
26
  import org.junit.Test;
26
27
  import org.junit.rules.ExpectedException;
28
+ import org.msgpack.value.Value;
27
29
 
28
30
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
29
31
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
30
32
  import static org.embulk.spi.type.Types.*;
31
33
  import static org.junit.Assert.assertEquals;
34
+ import static org.msgpack.value.ValueFactory.newArray;
35
+ import static org.msgpack.value.ValueFactory.newBoolean;
36
+ import static org.msgpack.value.ValueFactory.newFloat;
37
+ import static org.msgpack.value.ValueFactory.newInteger;
38
+ import static org.msgpack.value.ValueFactory.newMap;
39
+ import static org.msgpack.value.ValueFactory.newMapBuilder;
40
+ import static org.msgpack.value.ValueFactory.newString;
32
41
 
33
42
  public class TestExpandJsonFilterPlugin
34
43
  {
@@ -39,16 +48,16 @@ public class TestExpandJsonFilterPlugin
39
48
  public ExpectedException exception = ExpectedException.none();
40
49
 
41
50
 
42
- private final Schema schema = Schema.builder()
43
- .add("_c0", STRING)
44
- .add("_c1", STRING)
45
- .build();
46
51
  private final String c1Data = "_c1_data";
52
+ // schema object is recreated per test method. Since each test method might require different schema,
53
+ // it's better that this field can be overwritten by each method.
54
+ private Schema schema;
47
55
  private ExpandJsonFilterPlugin expandJsonFilterPlugin;
48
56
 
49
57
  @Before
50
58
  public void createResources()
51
59
  {
60
+ schema = schema("_c0", STRING, "_c1", STRING); // default schema
52
61
  expandJsonFilterPlugin = new ExpandJsonFilterPlugin();
53
62
  }
54
63
 
@@ -112,6 +121,27 @@ public class TestExpandJsonFilterPlugin
112
121
  });
113
122
  }
114
123
 
124
+ @Test
125
+ public void testThrowExceptionInvalidJsonColumnType()
126
+ {
127
+ String configYaml = "" +
128
+ "type: expand_json\n" +
129
+ "json_column_name: _c2\n" +
130
+ "expanded_columns:\n" +
131
+ " - {name: _c1, type: string}";
132
+ ConfigSource config = getConfigFromYaml(configYaml);
133
+ schema = schema("_c0", STRING, "_c1", STRING, "_c2", LONG);
134
+
135
+ exception.expect(ConfigException.class);
136
+ expandJsonFilterPlugin.transaction(config, schema, new Control() {
137
+ @Override
138
+ public void run(TaskSource taskSource, Schema schema)
139
+ {
140
+ // do nothing
141
+ }
142
+ });
143
+ }
144
+
115
145
  @Test
116
146
  public void testThrowExceptionAbsentExpandedColumns()
117
147
  {
@@ -136,7 +166,8 @@ public class TestExpandJsonFilterPlugin
136
166
  " - {name: _j2, type: long}\n" +
137
167
  " - {name: _j3, type: timestamp}\n" +
138
168
  " - {name: _j4, type: double}\n" +
139
- " - {name: _j5, type: string}\n";
169
+ " - {name: _j5, type: string}\n" +
170
+ " - {name: _j6, type: json}\n";
140
171
 
141
172
  ConfigSource config = getConfigFromYaml(configYaml);
142
173
  PluginTask task = config.loadConfig(PluginTask.class);
@@ -163,6 +194,7 @@ public class TestExpandJsonFilterPlugin
163
194
  " - {name: _j3, type: timestamp}\n" +
164
195
  " - {name: _j4, type: double}\n" +
165
196
  " - {name: _j5, type: string}\n" +
197
+ " - {name: _j6, type: json}\n" +
166
198
  " - {name: _c0, type: string}\n";
167
199
 
168
200
  ConfigSource config = getConfigFromYaml(configYaml);
@@ -172,15 +204,16 @@ public class TestExpandJsonFilterPlugin
172
204
  @Override
173
205
  public void run(TaskSource taskSource, Schema outputSchema)
174
206
  {
175
- assertEquals(7, outputSchema.getColumnCount());
207
+ assertEquals(8, outputSchema.getColumnCount());
176
208
 
177
209
  Column new_j1 = outputSchema.getColumn(0);
178
210
  Column new_j2 = outputSchema.getColumn(1);
179
211
  Column new_j3 = outputSchema.getColumn(2);
180
212
  Column new_j4 = outputSchema.getColumn(3);
181
213
  Column new_j5 = outputSchema.getColumn(4);
182
- Column new_c0 = outputSchema.getColumn(5);
183
- Column old_c1 = outputSchema.getColumn(6);
214
+ Column new_j6 = outputSchema.getColumn(5);
215
+ Column new_c0 = outputSchema.getColumn(6);
216
+ Column old_c1 = outputSchema.getColumn(7);
184
217
 
185
218
  assertEquals("_j1", new_j1.getName());
186
219
  assertEquals(BOOLEAN, new_j1.getType());
@@ -192,17 +225,131 @@ public class TestExpandJsonFilterPlugin
192
225
  assertEquals(DOUBLE, new_j4.getType());
193
226
  assertEquals("_j5", new_j5.getName());
194
227
  assertEquals(STRING, new_j5.getType());
228
+ assertEquals("_j6", new_j6.getName());
229
+ assertEquals(JSON, new_j6.getType());
195
230
  assertEquals("_c0", new_c0.getName());
196
231
  assertEquals(STRING, new_c0.getType());
197
232
  assertEquals("_c1", old_c1.getName());
198
233
  assertEquals(STRING, old_c1.getType());
234
+ }
235
+ });
236
+ }
199
237
 
238
+ @Test
239
+ public void testExpandJsonValuesFromJson()
240
+ {
241
+ String configYaml = "" +
242
+ "type: expand_json\n" +
243
+ "json_column_name: _c0\n" +
244
+ "root: $.\n" +
245
+ "time_zone: Asia/Tokyo\n" +
246
+ "expanded_columns:\n" +
247
+ " - {name: _j0, type: boolean}\n" +
248
+ " - {name: _j1, type: long}\n" +
249
+ " - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
250
+ " - {name: _j3, type: double}\n" +
251
+ " - {name: _j4, type: string}\n" +
252
+ " - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
253
+ " - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
254
+ // JsonPath: https://github.com/jayway/JsonPath
255
+ " - {name: '_j7.store.book[*].author', type: string}\n" +
256
+ " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
257
+ " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
258
+ " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
259
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
260
+ " - {name: _c0, type: string}\n";
261
+
262
+ ConfigSource config = getConfigFromYaml(configYaml);
263
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
264
+
265
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
266
+ {
267
+ @Override
268
+ public void run(TaskSource taskSource, Schema outputSchema)
269
+ {
270
+ MockPageOutput mockPageOutput = new MockPageOutput();
271
+ Value data = newMapBuilder()
272
+ .put(s("_j0"), b(true))
273
+ .put(s("_j1"), i(2))
274
+ .put(s("_j2"), s("2014-10-21 04:44:33 +0900"))
275
+ .put(s("_j3"), f(4.4))
276
+ .put(s("_j4"), s("v5"))
277
+ .put(s("_j5"), s("2014-10-21 04:44:33 +0000"))
278
+ .put(s("_j6"), s("2014-10-21 04:44:33"))
279
+ .put(s("_j7"), newMapBuilder()
280
+ .put(s("store"), newMapBuilder()
281
+ .put(s("book"), newArray(
282
+ newMap(s("author"), s("Nigel Rees"), s("title"), s("Sayings of the Century"), s("price"), f(8.95)),
283
+ newMap(s("author"), s("Evelyn Waugh"), s("title"), s("Sword of Honour"), s("price"), f(12.99)),
284
+ newMap(s("author"), s("Herman Melville"), s("title"), s("Moby Dick"), s("isbn"), s("0-553-21311-3"), s("price"), f(8.99)),
285
+ newMap(s("author"), s("J. R. R. Tolkien"), s("title"), s("The Lord of the Rings"), s("isbn"), s("0-395-19395-8"), s("price"), f(22.99))
286
+ ))
287
+ .put(s("bicycle"), newMap(s("color"), s("red"), s("price"), f(19.95)))
288
+ .build())
289
+ .put(s("expensive"), i(10))
290
+ .build())
291
+ .put(s("_c0"), s("v12"))
292
+ .build();
293
+
294
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
295
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
296
+ pageOutput.add(page);
297
+ }
298
+
299
+ pageOutput.finish();
300
+ }
301
+
302
+ PageReader pageReader = new PageReader(outputSchema);
303
+
304
+ for (Page page : mockPageOutput.pages) {
305
+ pageReader.setPage(page);
306
+ assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
307
+ assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
308
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
309
+ assertEquals(String.valueOf(4.4), String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
310
+ assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
311
+ assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
312
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
313
+ assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
314
+ pageReader.getString(outputSchema.getColumn(7)));
315
+ assertEquals("[\"Nigel Rees\",\"Herman Melville\"]", pageReader.getString(outputSchema.getColumn(8)));
316
+ assertEquals("" +
317
+ "[" +
318
+ "{\"author\":\"Herman Melville\",\"title\":\"Moby Dick\",\"isbn\":\"0-553-21311-3\",\"price\":8.99}," +
319
+ "{\"author\":\"J. R. R. Tolkien\",\"title\":\"The Lord of the Rings\",\"isbn\":\"0-395-19395-8\",\"price\":22.99}" +
320
+ "]",
321
+ pageReader.getString(outputSchema.getColumn(9)));
322
+ assertEquals("[\"Sayings of the Century\"]", pageReader.getString(outputSchema.getColumn(10)));
323
+ assertEquals("Herman Melville", pageReader.getString(outputSchema.getColumn(11)));
324
+ assertEquals("v12", pageReader.getString(outputSchema.getColumn(12)));
325
+ assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(13)));
326
+ }
200
327
  }
201
328
  });
202
329
  }
203
330
 
331
+ private static Value s(String value)
332
+ {
333
+ return newString(value);
334
+ }
335
+
336
+ private static Value i(int value)
337
+ {
338
+ return newInteger(value);
339
+ }
340
+
341
+ private static Value f(double value)
342
+ {
343
+ return newFloat(value);
344
+ }
345
+
346
+ private static Value b(boolean value)
347
+ {
348
+ return newBoolean(value);
349
+ }
350
+
204
351
  @Test
205
- public void testExpandJsonValues()
352
+ public void testExpandJsonValuesFromString()
206
353
  {
207
354
  String configYaml = "" +
208
355
  "type: expand_json\n" +
@@ -429,4 +576,13 @@ public class TestExpandJsonFilterPlugin
429
576
  }
430
577
  });
431
578
  }
579
+
580
+ private static Schema schema(Object... nameAndTypes)
581
+ {
582
+ Schema.Builder builder = Schema.builder();
583
+ for (int i = 0; i < nameAndTypes.length; i += 2) {
584
+ builder.add((String) nameAndTypes[i], (Type) nameAndTypes[i + 1]);
585
+ }
586
+ return builder.build();
587
+ }
432
588
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-04 00:00:00.000000000 Z
11
+ date: 2016-03-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
66
66
  - classpath/accessors-smart-1.1.jar
67
67
  - classpath/asm-5.0.3.jar
68
- - classpath/embulk-filter-expand_json-0.0.5.jar
68
+ - classpath/embulk-filter-expand_json-0.0.6.jar
69
69
  - classpath/json-path-2.2.0.jar
70
70
  - classpath/json-smart-2.2.1.jar
71
71
  - classpath/slf4j-api-1.7.16.jar