embulk-parser-jsonpath 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +7 -0
  4. data/CHANGELOG.md +5 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +107 -0
  7. data/build.gradle +111 -0
  8. data/config/checkstyle/checkstyle.xml +128 -0
  9. data/config/checkstyle/default.xml +108 -0
  10. data/example/conf.yml +18 -0
  11. data/example/dummy.rb +27 -0
  12. data/example/input.json +1006 -0
  13. data/example/input2.json +1006 -0
  14. data/example/seed.yml +8 -0
  15. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  16. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  17. data/gradlew +160 -0
  18. data/gradlew.bat +90 -0
  19. data/lib/embulk/guess/jsonpath.rb +61 -0
  20. data/lib/embulk/parser/jsonpath.rb +3 -0
  21. data/src/main/java/org/embulk/parser/jsonpath/ColumnCaster.java +97 -0
  22. data/src/main/java/org/embulk/parser/jsonpath/ColumnVisitorImpl.java +167 -0
  23. data/src/main/java/org/embulk/parser/jsonpath/JsonRecordValidateException.java +22 -0
  24. data/src/main/java/org/embulk/parser/jsonpath/JsonpathParserPlugin.java +148 -0
  25. data/src/main/java/org/embulk/parser/jsonpath/cast/BooleanCast.java +39 -0
  26. data/src/main/java/org/embulk/parser/jsonpath/cast/DoubleCast.java +41 -0
  27. data/src/main/java/org/embulk/parser/jsonpath/cast/JsonCast.java +40 -0
  28. data/src/main/java/org/embulk/parser/jsonpath/cast/LongCast.java +47 -0
  29. data/src/main/java/org/embulk/parser/jsonpath/cast/StringCast.java +82 -0
  30. data/src/test/java/org/embulk/parser/jsonpath/TestColumnCaster.java +256 -0
  31. data/src/test/java/org/embulk/parser/jsonpath/TestJsonpathParserPlugin.java +292 -0
  32. data/src/test/java/org/embulk/parser/jsonpath/cast/TestBooleanCast.java +56 -0
  33. data/src/test/java/org/embulk/parser/jsonpath/cast/TestDoubleCast.java +49 -0
  34. data/src/test/java/org/embulk/parser/jsonpath/cast/TestJsonCast.java +79 -0
  35. data/src/test/java/org/embulk/parser/jsonpath/cast/TestLongCast.java +41 -0
  36. data/src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java +103 -0
  37. metadata +113 -0
@@ -0,0 +1,292 @@
1
+ package org.embulk.parser.jsonpath;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.Lists;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigLoader;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.spi.ColumnConfig;
10
+ import org.embulk.spi.DataException;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.FileInput;
13
+ import org.embulk.spi.ParserPlugin;
14
+ import org.embulk.spi.Schema;
15
+ import org.embulk.spi.SchemaConfig;
16
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
17
+ import org.embulk.spi.time.Timestamp;
18
+ import org.embulk.spi.type.Type;
19
+ import org.embulk.spi.util.InputStreamFileInput;
20
+ import org.embulk.spi.util.Pages;
21
+ import org.junit.Before;
22
+ import org.junit.Rule;
23
+ import org.junit.Test;
24
+
25
+ import java.io.ByteArrayInputStream;
26
+ import java.io.File;
27
+ import java.io.IOException;
28
+ import java.io.InputStream;
29
+ import java.nio.charset.Charset;
30
+ import java.nio.charset.StandardCharsets;
31
+ import java.util.List;
32
+
33
+ import static org.embulk.spi.type.Types.BOOLEAN;
34
+ import static org.embulk.spi.type.Types.DOUBLE;
35
+ import static org.embulk.spi.type.Types.JSON;
36
+ import static org.embulk.spi.type.Types.LONG;
37
+ import static org.embulk.spi.type.Types.STRING;
38
+ import static org.embulk.spi.type.Types.TIMESTAMP;
39
+ import static org.junit.Assert.assertEquals;
40
+ import static org.junit.Assert.assertNull;
41
+ import static org.junit.Assert.assertTrue;
42
+ import static org.junit.Assert.fail;
43
+ import static org.msgpack.value.ValueFactory.newArray;
44
+ import static org.msgpack.value.ValueFactory.newMap;
45
+ import static org.msgpack.value.ValueFactory.newString;
46
+
47
+ public class TestJsonpathParserPlugin
48
+ {
49
+ @Rule
50
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
51
+
52
+ private ConfigSource config;
53
+ private JsonpathParserPlugin plugin;
54
+ private MockPageOutput output;
55
+
56
+ @Before
57
+ public void createResource()
58
+ {
59
+ config = config().set("type", "jsonpath");
60
+ plugin = new JsonpathParserPlugin();
61
+ recreatePageOutput();
62
+ }
63
+
64
+ private void recreatePageOutput()
65
+ {
66
+ output = new MockPageOutput();
67
+ }
68
+
69
+ private ConfigSource config()
70
+ {
71
+ return runtime.getExec().newConfigSource();
72
+ }
73
+
74
+ private File getResourceFile(String resourceName)
75
+ throws IOException
76
+ {
77
+ return new File(this.getClass().getResource(resourceName).getFile());
78
+ }
79
+
80
+ private ConfigSource getConfigFromYamlFile(File yamlFile)
81
+ throws IOException
82
+ {
83
+ ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
84
+ return loader.fromYamlFile(yamlFile);
85
+ }
86
+
87
+ private void transaction(ConfigSource config, final FileInput input)
88
+ {
89
+ plugin.transaction(config, new ParserPlugin.Control()
90
+ {
91
+ @Override
92
+ public void run(TaskSource taskSource, Schema schema)
93
+ {
94
+ plugin.run(taskSource, schema, input, output);
95
+ }
96
+ });
97
+ }
98
+
99
+ @Test
100
+ public void skipRecords()
101
+ throws Exception
102
+ {
103
+ SchemaConfig schema = schema(
104
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
105
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
106
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
107
+
108
+ transaction(config, fileInput(
109
+ "[",
110
+ "[]",
111
+ "\"embulk\"",
112
+ "10",
113
+ "true",
114
+ "false",
115
+ "null",
116
+ " ",
117
+ "]"
118
+ ));
119
+
120
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
121
+ assertEquals(0, records.size());
122
+ }
123
+
124
+ @Test
125
+ public void throwDataException()
126
+ throws Exception
127
+ {
128
+ SchemaConfig schema = schema(
129
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
130
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
131
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
132
+
133
+ try {
134
+ transaction(config, fileInput(
135
+ "\"not_map_value\""
136
+ ));
137
+ fail();
138
+ }
139
+ catch (Throwable t) {
140
+ assertTrue(t instanceof DataException);
141
+ }
142
+ }
143
+
144
+ @Test
145
+ public void writeNils()
146
+ throws Exception
147
+ {
148
+ SchemaConfig schema = schema(
149
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
150
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
151
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
152
+
153
+ transaction(config, fileInput(
154
+ "[",
155
+ "{}",
156
+ "{\"_c0\":null,\"_c1\":null,\"_c2\":null}",
157
+ "{\"_c3\":null,\"_c4\":null,\"_c5\":null}",
158
+ "{}",
159
+ "]"
160
+ ));
161
+
162
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
163
+ assertEquals(4, records.size());
164
+
165
+ for (Object[] record : records) {
166
+ for (int i = 0; i < 6; i++) {
167
+ assertNull(record[i]);
168
+ }
169
+ }
170
+ }
171
+
172
+ @Test
173
+ public void useNormal()
174
+ throws Exception
175
+ {
176
+ SchemaConfig schema = schema(
177
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
178
+ column("_c3", STRING), column("_c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z")), column("_c5", JSON));
179
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
180
+
181
+ transaction(config, fileInput(
182
+ "[",
183
+ "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
184
+ "[1, 2, 3]",
185
+ "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
186
+ "]"
187
+ ));
188
+
189
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
190
+ assertEquals(2, records.size());
191
+
192
+ Object[] record;
193
+ {
194
+ record = records.get(0);
195
+ assertEquals(true, record[0]);
196
+ assertEquals(10L, record[1]);
197
+ assertEquals(0.1, (Double) record[2], 0.0001);
198
+ assertEquals("embulk", record[3]);
199
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
200
+ assertEquals(newMap(newString("k"), newString("v")), record[5]);
201
+ }
202
+ {
203
+ record = records.get(1);
204
+ assertEquals(false, record[0]);
205
+ assertEquals(-10L, record[1]);
206
+ assertEquals(1.0, (Double) record[2], 0.0001);
207
+ assertEquals("エンバルク", record[3]);
208
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
209
+ assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
210
+ }
211
+
212
+ recreatePageOutput();
213
+ }
214
+
215
+ @Test
216
+ public void useNormalWithRootPath()
217
+ throws Exception
218
+ {
219
+ SchemaConfig schema = schema(
220
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
221
+ column("_c3", STRING), column("_c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z")), column("_c5", JSON));
222
+ ConfigSource config = this.config.deepCopy().set("columns", schema).set("root", "$.records");
223
+
224
+ transaction(config, fileInput(
225
+ "{\"records\":[",
226
+ "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
227
+ "[1, 2, 3]",
228
+ "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
229
+ "]}"
230
+ ));
231
+
232
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
233
+ assertEquals(2, records.size());
234
+
235
+ Object[] record;
236
+ {
237
+ record = records.get(0);
238
+ assertEquals(true, record[0]);
239
+ assertEquals(10L, record[1]);
240
+ assertEquals(0.1, (Double) record[2], 0.0001);
241
+ assertEquals("embulk", record[3]);
242
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
243
+ assertEquals(newMap(newString("k"), newString("v")), record[5]);
244
+ }
245
+ {
246
+ record = records.get(1);
247
+ assertEquals(false, record[0]);
248
+ assertEquals(-10L, record[1]);
249
+ assertEquals(1.0, (Double) record[2], 0.0001);
250
+ assertEquals("エンバルク", record[3]);
251
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
252
+ assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
253
+ }
254
+
255
+ recreatePageOutput();
256
+ }
257
+
258
+ private FileInput fileInput(String... lines)
259
+ throws Exception
260
+ {
261
+ StringBuilder sb = new StringBuilder();
262
+ for (String line : lines) {
263
+ sb.append(line).append("\n");
264
+ }
265
+
266
+ ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8));
267
+ return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
268
+ }
269
+
270
+ private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
271
+ throws IOException
272
+ {
273
+ return new InputStreamFileInput.IteratorProvider(
274
+ ImmutableList.copyOf(inputStreams));
275
+ }
276
+
277
+ private SchemaConfig schema(ColumnConfig... columns)
278
+ {
279
+ return new SchemaConfig(Lists.newArrayList(columns));
280
+ }
281
+
282
+ private ColumnConfig column(String name, Type type)
283
+ {
284
+ return column(name, type, config());
285
+ }
286
+
287
+ private ColumnConfig column(String name, Type type, ConfigSource option)
288
+ {
289
+ return new ColumnConfig(name, type, option);
290
+ }
291
+
292
+ }
@@ -0,0 +1,56 @@
1
+ package org.embulk.parser.jsonpath.cast;
2
+
3
+ import org.embulk.spi.DataException;
4
+ import org.junit.Test;
5
+
6
+ import static org.junit.Assert.assertEquals;
7
+ import static org.junit.Assert.assertTrue;
8
+ import static org.junit.Assert.fail;
9
+
10
+ public class TestBooleanCast
11
+ {
12
+ @Test
13
+ public void asBoolean()
14
+ {
15
+ assertEquals(true, BooleanCast.asBoolean(true));
16
+ assertEquals(false, BooleanCast.asBoolean(false));
17
+ }
18
+
19
+ @Test
20
+ public void asLong()
21
+ {
22
+ assertEquals(1, BooleanCast.asLong(true));
23
+ assertEquals(0, BooleanCast.asLong(false));
24
+ }
25
+
26
+ @Test
27
+ public void asDouble()
28
+ {
29
+ try {
30
+ BooleanCast.asDouble(true);
31
+ fail();
32
+ }
33
+ catch (Throwable t) {
34
+ assertTrue(t instanceof DataException);
35
+ }
36
+ }
37
+
38
+ @Test
39
+ public void asString()
40
+ {
41
+ assertEquals("true", BooleanCast.asString(true));
42
+ assertEquals("false", BooleanCast.asString(false));
43
+ }
44
+
45
+ @Test
46
+ public void asTimestamp()
47
+ {
48
+ try {
49
+ BooleanCast.asTimestamp(true);
50
+ fail();
51
+ }
52
+ catch (Throwable t) {
53
+ assertTrue(t instanceof DataException);
54
+ }
55
+ }
56
+ }
@@ -0,0 +1,49 @@
1
+ package org.embulk.parser.jsonpath.cast;
2
+
3
+ import org.embulk.spi.DataException;
4
+ import org.embulk.spi.time.Timestamp;
5
+ import org.junit.Test;
6
+
7
+ import static org.junit.Assert.assertEquals;
8
+ import static org.junit.Assert.assertTrue;
9
+ import static org.junit.Assert.fail;
10
+
11
+ public class TestDoubleCast
12
+ {
13
+ @Test
14
+ public void asBoolean()
15
+ {
16
+ try {
17
+ DoubleCast.asBoolean(0.5);
18
+ fail();
19
+ }
20
+ catch (Throwable t) {
21
+ assertTrue(t instanceof DataException);
22
+ }
23
+ }
24
+
25
+ @Test
26
+ public void asLong()
27
+ {
28
+ assertEquals(0, DoubleCast.asLong(0.5));
29
+ }
30
+
31
+ @Test
32
+ public void asDouble()
33
+ {
34
+ assertEquals(0.5, DoubleCast.asDouble(0.5), 0.0);
35
+ }
36
+
37
+ @Test
38
+ public void asString()
39
+ {
40
+ assertEquals("0.5", DoubleCast.asString(0.5));
41
+ }
42
+
43
+ @Test
44
+ public void asTimestamp()
45
+ {
46
+ Timestamp expected = Timestamp.ofEpochSecond(1, 500000000);
47
+ assertEquals(expected, DoubleCast.asTimestamp(1.5));
48
+ }
49
+ }
@@ -0,0 +1,79 @@
1
+ package org.embulk.parser.jsonpath.cast;
2
+
3
+ import org.embulk.spi.DataException;
4
+ import org.junit.Before;
5
+ import org.junit.Test;
6
+ import org.msgpack.value.Value;
7
+ import org.msgpack.value.ValueFactory;
8
+
9
+ import static org.junit.Assert.assertEquals;
10
+ import static org.junit.Assert.assertTrue;
11
+ import static org.junit.Assert.fail;
12
+
13
+ public class TestJsonCast
14
+ {
15
+ public Value value;
16
+
17
+ @Before
18
+ public void createResource()
19
+ {
20
+ Value[] kvs = new Value[2];
21
+ kvs[0] = ValueFactory.newString("k");
22
+ kvs[1] = ValueFactory.newString("v");
23
+ value = ValueFactory.newMap(kvs);
24
+ }
25
+
26
+ @Test
27
+ public void asBoolean()
28
+ {
29
+ try {
30
+ JsonCast.asBoolean(value);
31
+ fail();
32
+ }
33
+ catch (Throwable t) {
34
+ assertTrue(t instanceof DataException);
35
+ }
36
+ }
37
+
38
+ @Test
39
+ public void asLong()
40
+ {
41
+ try {
42
+ JsonCast.asLong(value);
43
+ fail();
44
+ }
45
+ catch (Throwable t) {
46
+ assertTrue(t instanceof DataException);
47
+ }
48
+ }
49
+
50
+ @Test
51
+ public void asDouble()
52
+ {
53
+ try {
54
+ JsonCast.asDouble(value);
55
+ fail();
56
+ }
57
+ catch (Throwable t) {
58
+ assertTrue(t instanceof DataException);
59
+ }
60
+ }
61
+
62
+ @Test
63
+ public void asString()
64
+ {
65
+ assertEquals("{\"k\":\"v\"}", JsonCast.asString(value));
66
+ }
67
+
68
+ @Test
69
+ public void asTimestamp()
70
+ {
71
+ try {
72
+ JsonCast.asTimestamp(value);
73
+ fail();
74
+ }
75
+ catch (Throwable t) {
76
+ assertTrue(t instanceof DataException);
77
+ }
78
+ }
79
+ }