embulk-parser-jsonpath 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +7 -0
  4. data/CHANGELOG.md +5 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +107 -0
  7. data/build.gradle +111 -0
  8. data/config/checkstyle/checkstyle.xml +128 -0
  9. data/config/checkstyle/default.xml +108 -0
  10. data/example/conf.yml +18 -0
  11. data/example/dummy.rb +27 -0
  12. data/example/input.json +1006 -0
  13. data/example/input2.json +1006 -0
  14. data/example/seed.yml +8 -0
  15. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  16. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  17. data/gradlew +160 -0
  18. data/gradlew.bat +90 -0
  19. data/lib/embulk/guess/jsonpath.rb +61 -0
  20. data/lib/embulk/parser/jsonpath.rb +3 -0
  21. data/src/main/java/org/embulk/parser/jsonpath/ColumnCaster.java +97 -0
  22. data/src/main/java/org/embulk/parser/jsonpath/ColumnVisitorImpl.java +167 -0
  23. data/src/main/java/org/embulk/parser/jsonpath/JsonRecordValidateException.java +22 -0
  24. data/src/main/java/org/embulk/parser/jsonpath/JsonpathParserPlugin.java +148 -0
  25. data/src/main/java/org/embulk/parser/jsonpath/cast/BooleanCast.java +39 -0
  26. data/src/main/java/org/embulk/parser/jsonpath/cast/DoubleCast.java +41 -0
  27. data/src/main/java/org/embulk/parser/jsonpath/cast/JsonCast.java +40 -0
  28. data/src/main/java/org/embulk/parser/jsonpath/cast/LongCast.java +47 -0
  29. data/src/main/java/org/embulk/parser/jsonpath/cast/StringCast.java +82 -0
  30. data/src/test/java/org/embulk/parser/jsonpath/TestColumnCaster.java +256 -0
  31. data/src/test/java/org/embulk/parser/jsonpath/TestJsonpathParserPlugin.java +292 -0
  32. data/src/test/java/org/embulk/parser/jsonpath/cast/TestBooleanCast.java +56 -0
  33. data/src/test/java/org/embulk/parser/jsonpath/cast/TestDoubleCast.java +49 -0
  34. data/src/test/java/org/embulk/parser/jsonpath/cast/TestJsonCast.java +79 -0
  35. data/src/test/java/org/embulk/parser/jsonpath/cast/TestLongCast.java +41 -0
  36. data/src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java +103 -0
  37. metadata +113 -0
@@ -0,0 +1,292 @@
1
+ package org.embulk.parser.jsonpath;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.Lists;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigLoader;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.spi.ColumnConfig;
10
+ import org.embulk.spi.DataException;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.FileInput;
13
+ import org.embulk.spi.ParserPlugin;
14
+ import org.embulk.spi.Schema;
15
+ import org.embulk.spi.SchemaConfig;
16
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
17
+ import org.embulk.spi.time.Timestamp;
18
+ import org.embulk.spi.type.Type;
19
+ import org.embulk.spi.util.InputStreamFileInput;
20
+ import org.embulk.spi.util.Pages;
21
+ import org.junit.Before;
22
+ import org.junit.Rule;
23
+ import org.junit.Test;
24
+
25
+ import java.io.ByteArrayInputStream;
26
+ import java.io.File;
27
+ import java.io.IOException;
28
+ import java.io.InputStream;
29
+ import java.nio.charset.Charset;
30
+ import java.nio.charset.StandardCharsets;
31
+ import java.util.List;
32
+
33
+ import static org.embulk.spi.type.Types.BOOLEAN;
34
+ import static org.embulk.spi.type.Types.DOUBLE;
35
+ import static org.embulk.spi.type.Types.JSON;
36
+ import static org.embulk.spi.type.Types.LONG;
37
+ import static org.embulk.spi.type.Types.STRING;
38
+ import static org.embulk.spi.type.Types.TIMESTAMP;
39
+ import static org.junit.Assert.assertEquals;
40
+ import static org.junit.Assert.assertNull;
41
+ import static org.junit.Assert.assertTrue;
42
+ import static org.junit.Assert.fail;
43
+ import static org.msgpack.value.ValueFactory.newArray;
44
+ import static org.msgpack.value.ValueFactory.newMap;
45
+ import static org.msgpack.value.ValueFactory.newString;
46
+
47
+ public class TestJsonpathParserPlugin
48
+ {
49
+ @Rule
50
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
51
+
52
+ private ConfigSource config;
53
+ private JsonpathParserPlugin plugin;
54
+ private MockPageOutput output;
55
+
56
+ @Before
57
+ public void createResource()
58
+ {
59
+ config = config().set("type", "jsonpath");
60
+ plugin = new JsonpathParserPlugin();
61
+ recreatePageOutput();
62
+ }
63
+
64
+ private void recreatePageOutput()
65
+ {
66
+ output = new MockPageOutput();
67
+ }
68
+
69
+ private ConfigSource config()
70
+ {
71
+ return runtime.getExec().newConfigSource();
72
+ }
73
+
74
+ private File getResourceFile(String resourceName)
75
+ throws IOException
76
+ {
77
+ return new File(this.getClass().getResource(resourceName).getFile());
78
+ }
79
+
80
+ private ConfigSource getConfigFromYamlFile(File yamlFile)
81
+ throws IOException
82
+ {
83
+ ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
84
+ return loader.fromYamlFile(yamlFile);
85
+ }
86
+
87
+ private void transaction(ConfigSource config, final FileInput input)
88
+ {
89
+ plugin.transaction(config, new ParserPlugin.Control()
90
+ {
91
+ @Override
92
+ public void run(TaskSource taskSource, Schema schema)
93
+ {
94
+ plugin.run(taskSource, schema, input, output);
95
+ }
96
+ });
97
+ }
98
+
99
+ @Test
100
+ public void skipRecords()
101
+ throws Exception
102
+ {
103
+ SchemaConfig schema = schema(
104
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
105
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
106
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
107
+
108
+ transaction(config, fileInput(
109
+ "[",
110
+ "[]",
111
+ "\"embulk\"",
112
+ "10",
113
+ "true",
114
+ "false",
115
+ "null",
116
+ " ",
117
+ "]"
118
+ ));
119
+
120
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
121
+ assertEquals(0, records.size());
122
+ }
123
+
124
+ @Test
125
+ public void throwDataException()
126
+ throws Exception
127
+ {
128
+ SchemaConfig schema = schema(
129
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
130
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
131
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
132
+
133
+ try {
134
+ transaction(config, fileInput(
135
+ "\"not_map_value\""
136
+ ));
137
+ fail();
138
+ }
139
+ catch (Throwable t) {
140
+ assertTrue(t instanceof DataException);
141
+ }
142
+ }
143
+
144
+ @Test
145
+ public void writeNils()
146
+ throws Exception
147
+ {
148
+ SchemaConfig schema = schema(
149
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
150
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
151
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
152
+
153
+ transaction(config, fileInput(
154
+ "[",
155
+ "{}",
156
+ "{\"_c0\":null,\"_c1\":null,\"_c2\":null}",
157
+ "{\"_c3\":null,\"_c4\":null,\"_c5\":null}",
158
+ "{}",
159
+ "]"
160
+ ));
161
+
162
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
163
+ assertEquals(4, records.size());
164
+
165
+ for (Object[] record : records) {
166
+ for (int i = 0; i < 6; i++) {
167
+ assertNull(record[i]);
168
+ }
169
+ }
170
+ }
171
+
172
+ @Test
173
+ public void useNormal()
174
+ throws Exception
175
+ {
176
+ SchemaConfig schema = schema(
177
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
178
+ column("_c3", STRING), column("_c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z")), column("_c5", JSON));
179
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
180
+
181
+ transaction(config, fileInput(
182
+ "[",
183
+ "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
184
+ "[1, 2, 3]",
185
+ "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
186
+ "]"
187
+ ));
188
+
189
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
190
+ assertEquals(2, records.size());
191
+
192
+ Object[] record;
193
+ {
194
+ record = records.get(0);
195
+ assertEquals(true, record[0]);
196
+ assertEquals(10L, record[1]);
197
+ assertEquals(0.1, (Double) record[2], 0.0001);
198
+ assertEquals("embulk", record[3]);
199
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
200
+ assertEquals(newMap(newString("k"), newString("v")), record[5]);
201
+ }
202
+ {
203
+ record = records.get(1);
204
+ assertEquals(false, record[0]);
205
+ assertEquals(-10L, record[1]);
206
+ assertEquals(1.0, (Double) record[2], 0.0001);
207
+ assertEquals("エンバルク", record[3]);
208
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
209
+ assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
210
+ }
211
+
212
+ recreatePageOutput();
213
+ }
214
+
215
+ @Test
216
+ public void useNormalWithRootPath()
217
+ throws Exception
218
+ {
219
+ SchemaConfig schema = schema(
220
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
221
+ column("_c3", STRING), column("_c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z")), column("_c5", JSON));
222
+ ConfigSource config = this.config.deepCopy().set("columns", schema).set("root", "$.records");
223
+
224
+ transaction(config, fileInput(
225
+ "{\"records\":[",
226
+ "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
227
+ "[1, 2, 3]",
228
+ "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
229
+ "]}"
230
+ ));
231
+
232
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
233
+ assertEquals(2, records.size());
234
+
235
+ Object[] record;
236
+ {
237
+ record = records.get(0);
238
+ assertEquals(true, record[0]);
239
+ assertEquals(10L, record[1]);
240
+ assertEquals(0.1, (Double) record[2], 0.0001);
241
+ assertEquals("embulk", record[3]);
242
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
243
+ assertEquals(newMap(newString("k"), newString("v")), record[5]);
244
+ }
245
+ {
246
+ record = records.get(1);
247
+ assertEquals(false, record[0]);
248
+ assertEquals(-10L, record[1]);
249
+ assertEquals(1.0, (Double) record[2], 0.0001);
250
+ assertEquals("エンバルク", record[3]);
251
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
252
+ assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
253
+ }
254
+
255
+ recreatePageOutput();
256
+ }
257
+
258
+ private FileInput fileInput(String... lines)
259
+ throws Exception
260
+ {
261
+ StringBuilder sb = new StringBuilder();
262
+ for (String line : lines) {
263
+ sb.append(line).append("\n");
264
+ }
265
+
266
+ ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8));
267
+ return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
268
+ }
269
+
270
+ private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
271
+ throws IOException
272
+ {
273
+ return new InputStreamFileInput.IteratorProvider(
274
+ ImmutableList.copyOf(inputStreams));
275
+ }
276
+
277
+ private SchemaConfig schema(ColumnConfig... columns)
278
+ {
279
+ return new SchemaConfig(Lists.newArrayList(columns));
280
+ }
281
+
282
+ private ColumnConfig column(String name, Type type)
283
+ {
284
+ return column(name, type, config());
285
+ }
286
+
287
+ private ColumnConfig column(String name, Type type, ConfigSource option)
288
+ {
289
+ return new ColumnConfig(name, type, option);
290
+ }
291
+
292
+ }
@@ -0,0 +1,56 @@
1
+ package org.embulk.parser.jsonpath.cast;
2
+
3
+ import org.embulk.spi.DataException;
4
+ import org.junit.Test;
5
+
6
+ import static org.junit.Assert.assertEquals;
7
+ import static org.junit.Assert.assertTrue;
8
+ import static org.junit.Assert.fail;
9
+
10
+ public class TestBooleanCast
11
+ {
12
+ @Test
13
+ public void asBoolean()
14
+ {
15
+ assertEquals(true, BooleanCast.asBoolean(true));
16
+ assertEquals(false, BooleanCast.asBoolean(false));
17
+ }
18
+
19
+ @Test
20
+ public void asLong()
21
+ {
22
+ assertEquals(1, BooleanCast.asLong(true));
23
+ assertEquals(0, BooleanCast.asLong(false));
24
+ }
25
+
26
+ @Test
27
+ public void asDouble()
28
+ {
29
+ try {
30
+ BooleanCast.asDouble(true);
31
+ fail();
32
+ }
33
+ catch (Throwable t) {
34
+ assertTrue(t instanceof DataException);
35
+ }
36
+ }
37
+
38
+ @Test
39
+ public void asString()
40
+ {
41
+ assertEquals("true", BooleanCast.asString(true));
42
+ assertEquals("false", BooleanCast.asString(false));
43
+ }
44
+
45
+ @Test
46
+ public void asTimestamp()
47
+ {
48
+ try {
49
+ BooleanCast.asTimestamp(true);
50
+ fail();
51
+ }
52
+ catch (Throwable t) {
53
+ assertTrue(t instanceof DataException);
54
+ }
55
+ }
56
+ }
@@ -0,0 +1,49 @@
1
+ package org.embulk.parser.jsonpath.cast;
2
+
3
+ import org.embulk.spi.DataException;
4
+ import org.embulk.spi.time.Timestamp;
5
+ import org.junit.Test;
6
+
7
+ import static org.junit.Assert.assertEquals;
8
+ import static org.junit.Assert.assertTrue;
9
+ import static org.junit.Assert.fail;
10
+
11
+ public class TestDoubleCast
12
+ {
13
+ @Test
14
+ public void asBoolean()
15
+ {
16
+ try {
17
+ DoubleCast.asBoolean(0.5);
18
+ fail();
19
+ }
20
+ catch (Throwable t) {
21
+ assertTrue(t instanceof DataException);
22
+ }
23
+ }
24
+
25
+ @Test
26
+ public void asLong()
27
+ {
28
+ assertEquals(0, DoubleCast.asLong(0.5));
29
+ }
30
+
31
+ @Test
32
+ public void asDouble()
33
+ {
34
+ assertEquals(0.5, DoubleCast.asDouble(0.5), 0.0);
35
+ }
36
+
37
+ @Test
38
+ public void asString()
39
+ {
40
+ assertEquals("0.5", DoubleCast.asString(0.5));
41
+ }
42
+
43
+ @Test
44
+ public void asTimestamp()
45
+ {
46
+ Timestamp expected = Timestamp.ofEpochSecond(1, 500000000);
47
+ assertEquals(expected, DoubleCast.asTimestamp(1.5));
48
+ }
49
+ }
@@ -0,0 +1,79 @@
1
+ package org.embulk.parser.jsonpath.cast;
2
+
3
+ import org.embulk.spi.DataException;
4
+ import org.junit.Before;
5
+ import org.junit.Test;
6
+ import org.msgpack.value.Value;
7
+ import org.msgpack.value.ValueFactory;
8
+
9
+ import static org.junit.Assert.assertEquals;
10
+ import static org.junit.Assert.assertTrue;
11
+ import static org.junit.Assert.fail;
12
+
13
+ public class TestJsonCast
14
+ {
15
+ public Value value;
16
+
17
+ @Before
18
+ public void createResource()
19
+ {
20
+ Value[] kvs = new Value[2];
21
+ kvs[0] = ValueFactory.newString("k");
22
+ kvs[1] = ValueFactory.newString("v");
23
+ value = ValueFactory.newMap(kvs);
24
+ }
25
+
26
+ @Test
27
+ public void asBoolean()
28
+ {
29
+ try {
30
+ JsonCast.asBoolean(value);
31
+ fail();
32
+ }
33
+ catch (Throwable t) {
34
+ assertTrue(t instanceof DataException);
35
+ }
36
+ }
37
+
38
+ @Test
39
+ public void asLong()
40
+ {
41
+ try {
42
+ JsonCast.asLong(value);
43
+ fail();
44
+ }
45
+ catch (Throwable t) {
46
+ assertTrue(t instanceof DataException);
47
+ }
48
+ }
49
+
50
+ @Test
51
+ public void asDouble()
52
+ {
53
+ try {
54
+ JsonCast.asDouble(value);
55
+ fail();
56
+ }
57
+ catch (Throwable t) {
58
+ assertTrue(t instanceof DataException);
59
+ }
60
+ }
61
+
62
+ @Test
63
+ public void asString()
64
+ {
65
+ assertEquals("{\"k\":\"v\"}", JsonCast.asString(value));
66
+ }
67
+
68
+ @Test
69
+ public void asTimestamp()
70
+ {
71
+ try {
72
+ JsonCast.asTimestamp(value);
73
+ fail();
74
+ }
75
+ catch (Throwable t) {
76
+ assertTrue(t instanceof DataException);
77
+ }
78
+ }
79
+ }