embulk-filter-column 0.6.0.pre5 → 0.6.0.pre6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -4
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java +2 -10
- data/src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java +3 -2
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +3 -3
- data/src/main/java/org/embulk/filter/column/JsonPathUtil.java +2 -0
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +16 -9
- data/src/test/java/org/embulk/filter/column/TestColumnVisitorImpl.java +16 -16
- data/src/test/java/org/embulk/filter/column/TestJsonColumn.java +3 -3
- data/src/test/java/org/embulk/filter/column/TestJsonVisitor.java +11 -23
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28dd927b9569d7dc3b9f9fef47bf6c4d165f0910
|
4
|
+
data.tar.gz: 83e6e55aba543066e42ae4170d75dd081042cec6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22ab4bf44258f1719eb99114a2fa9e9e45c34ef11795769ecfc356bdd46dc8735632045470567823e3b1e5ec8d1ec1ce1149d08472246d1dbcfd3287ab3abfa8
|
7
|
+
data.tar.gz: 792770891fa30db0126b01aff7081076bff3bc129d223529180ad33fe90b0f9c34e0d3e142cf1898cccad96c2bfd44f56a6ff3e66c63f7c2bb8a5ab62b101093
|
data/README.md
CHANGED
data/build.gradle
CHANGED
@@ -2,14 +2,13 @@ package org.embulk.filter.column;
|
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
4
|
import com.google.common.collect.ImmutableList;
|
5
|
-
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
6
6
|
import org.embulk.config.Config;
|
7
7
|
import org.embulk.config.ConfigDefault;
|
8
8
|
import org.embulk.config.ConfigException;
|
9
9
|
import org.embulk.config.ConfigSource;
|
10
10
|
import org.embulk.config.Task;
|
11
11
|
import org.embulk.config.TaskSource;
|
12
|
-
|
13
12
|
import org.embulk.spi.Column;
|
14
13
|
import org.embulk.spi.Exec;
|
15
14
|
import org.embulk.spi.FilterPlugin;
|
@@ -21,14 +20,11 @@ import org.embulk.spi.Schema;
|
|
21
20
|
import org.embulk.spi.SchemaConfigException;
|
22
21
|
import org.embulk.spi.time.TimestampParser;
|
23
22
|
import org.embulk.spi.type.Type;
|
24
|
-
|
25
23
|
import org.joda.time.DateTimeZone;
|
26
24
|
import org.slf4j.Logger;
|
27
25
|
|
28
26
|
import java.util.List;
|
29
27
|
|
30
|
-
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
31
|
-
|
32
28
|
public class ColumnFilterPlugin implements FilterPlugin
|
33
29
|
{
|
34
30
|
private static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
|
@@ -95,11 +91,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
95
91
|
|
96
92
|
private void configure(PluginTask task)
|
97
93
|
{
|
98
|
-
|
99
|
-
List<ColumnConfig> addColumns = task.getAddColumns();
|
100
|
-
List<ColumnConfig> dropColumns = task.getDropColumns();
|
101
|
-
|
102
|
-
if (columns.size() > 0 && dropColumns.size() > 0) {
|
94
|
+
if (task.getColumns().size() > 0 && task.getDropColumns().size() > 0) {
|
103
95
|
throw new ConfigException("Either of \"columns\", \"drop_columns\" can be specified.");
|
104
96
|
}
|
105
97
|
}
|
@@ -1,8 +1,9 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import io.github.medjed.jsonpathcompiler.expressions.Utils;
|
4
3
|
import com.google.common.base.Throwables;
|
5
4
|
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.Utils;
|
6
|
+
|
6
7
|
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
7
8
|
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
8
9
|
|
@@ -126,7 +127,7 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
126
127
|
}
|
127
128
|
else if (type instanceof LongType) {
|
128
129
|
if (columnConfig.getDefault().isPresent()) {
|
129
|
-
return
|
130
|
+
return Long.valueOf(columnConfig.getDefault().get().toString());
|
130
131
|
}
|
131
132
|
}
|
132
133
|
else if (type instanceof DoubleType) {
|
@@ -5,8 +5,8 @@ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
|
5
5
|
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
6
6
|
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
7
7
|
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
8
|
-
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
9
8
|
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
10
10
|
import io.github.medjed.jsonpathcompiler.expressions.path.WildcardPathToken;
|
11
11
|
import org.embulk.config.ConfigException;
|
12
12
|
import org.embulk.spi.type.Type;
|
@@ -33,7 +33,7 @@ public class JsonColumn
|
|
33
33
|
private StringValue srcParentPathValue = null;
|
34
34
|
private Value srcTailNameValue = null;
|
35
35
|
|
36
|
-
public static int WILDCARD_INDEX = -1;
|
36
|
+
public static final int WILDCARD_INDEX = -1;
|
37
37
|
|
38
38
|
public JsonColumn(String path, Type type)
|
39
39
|
{
|
@@ -101,7 +101,7 @@ public class JsonColumn
|
|
101
101
|
return arrayIndexOperation.indexes().get(0).longValue();
|
102
102
|
}
|
103
103
|
else if (tail instanceof WildcardPathToken) {
|
104
|
-
return
|
104
|
+
return Long.valueOf(WILDCARD_INDEX);
|
105
105
|
}
|
106
106
|
else {
|
107
107
|
return null;
|
@@ -337,19 +337,26 @@ public class JsonVisitor
|
|
337
337
|
}
|
338
338
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
339
339
|
for (JsonColumn jsonColumn : this.jsonColumns.get(rootPath).values()) {
|
340
|
-
int
|
341
|
-
if (
|
342
|
-
for (
|
343
|
-
|
344
|
-
if (
|
345
|
-
v =
|
340
|
+
int i = jsonColumn.getTailIndex().intValue();
|
341
|
+
if (i == JsonColumn.WILDCARD_INDEX) {
|
342
|
+
for (i = 0; i < size; i++) {
|
343
|
+
int src = jsonColumn.getSrcTailIndex().intValue();
|
344
|
+
if (src == JsonColumn.WILDCARD_INDEX) {
|
345
|
+
Value v = arrayValue.get(i);
|
346
|
+
if (v == null) {
|
347
|
+
v = jsonColumn.getDefaultValue();
|
348
|
+
}
|
349
|
+
String newPath = jsonColumn.getPath(); // == newArrayJsonPath(rootPath, i); // [*]
|
350
|
+
Value visited = visit(newPath, v);
|
351
|
+
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
352
|
+
}
|
353
|
+
else {
|
354
|
+
assert (false); // not supported yet
|
346
355
|
}
|
347
|
-
String newPath = jsonColumn.getPath();
|
348
|
-
Value visited = visit(newPath, v);
|
349
|
-
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
350
356
|
}
|
351
357
|
}
|
352
358
|
else {
|
359
|
+
int src = jsonColumn.getSrcTailIndex().intValue();
|
353
360
|
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
354
361
|
if (v == null) {
|
355
362
|
v = jsonColumn.getDefaultValue();
|
@@ -100,8 +100,8 @@ public class TestColumnVisitorImpl
|
|
100
100
|
.add("remove_me", STRING)
|
101
101
|
.build();
|
102
102
|
List<Object[]> records = filter(task, inputSchema,
|
103
|
-
Timestamp.ofEpochSecond(0), "string",
|
104
|
-
Timestamp.ofEpochSecond(0), "string",
|
103
|
+
Timestamp.ofEpochSecond(0), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me",
|
104
|
+
Timestamp.ofEpochSecond(0), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me");
|
105
105
|
|
106
106
|
assertEquals(2, records.size());
|
107
107
|
|
@@ -111,9 +111,9 @@ public class TestColumnVisitorImpl
|
|
111
111
|
assertEquals(6, record.length);
|
112
112
|
assertEquals(Timestamp.ofEpochSecond(0), record[0]);
|
113
113
|
assertEquals("string", record[1]);
|
114
|
-
assertEquals(
|
115
|
-
assertEquals(
|
116
|
-
assertEquals(
|
114
|
+
assertEquals(Boolean.valueOf(true), record[2]);
|
115
|
+
assertEquals(Long.valueOf(0), record[3]);
|
116
|
+
assertEquals(Double.valueOf(0.5), record[4]);
|
117
117
|
assertEquals(ValueFactory.newString("json"), record[5]);
|
118
118
|
}
|
119
119
|
}
|
@@ -140,7 +140,7 @@ public class TestColumnVisitorImpl
|
|
140
140
|
.add("remove_me", STRING)
|
141
141
|
.build();
|
142
142
|
List<Object[]> records = filter(task, inputSchema,
|
143
|
-
Timestamp.ofEpochSecond(1436745600), "string",
|
143
|
+
Timestamp.ofEpochSecond(1436745600), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me",
|
144
144
|
null, null, null, null, null, null, "remove_me");
|
145
145
|
|
146
146
|
assertEquals(2, records.size());
|
@@ -151,9 +151,9 @@ public class TestColumnVisitorImpl
|
|
151
151
|
assertEquals(6, record.length);
|
152
152
|
assertEquals(Timestamp.ofEpochSecond(1436745600), record[0]);
|
153
153
|
assertEquals("string", record[1]);
|
154
|
-
assertEquals(
|
155
|
-
assertEquals(
|
156
|
-
assertEquals(
|
154
|
+
assertEquals(Boolean.valueOf(true), record[2]);
|
155
|
+
assertEquals(Long.valueOf(0), record[3]);
|
156
|
+
assertEquals(Double.valueOf(0.5), record[4]);
|
157
157
|
assertEquals(ValueFactory.newString("json"), record[5]);
|
158
158
|
}
|
159
159
|
{
|
@@ -161,9 +161,9 @@ public class TestColumnVisitorImpl
|
|
161
161
|
assertEquals(6, record.length);
|
162
162
|
assertEquals(Timestamp.ofEpochSecond(1436745600), record[0]);
|
163
163
|
assertEquals("string", record[1]);
|
164
|
-
assertEquals(
|
165
|
-
assertEquals(
|
166
|
-
assertEquals(
|
164
|
+
assertEquals(Boolean.valueOf(true), record[2]);
|
165
|
+
assertEquals(Long.valueOf(0), record[3]);
|
166
|
+
assertEquals(Double.valueOf(0.5), record[4]);
|
167
167
|
assertEquals("{\"foo\":\"bar\"}", record[5].toString());
|
168
168
|
}
|
169
169
|
}
|
@@ -213,7 +213,7 @@ public class TestColumnVisitorImpl
|
|
213
213
|
.add("keep_me", STRING)
|
214
214
|
.build();
|
215
215
|
List<Object[]> records = filter(task, inputSchema,
|
216
|
-
Timestamp.ofEpochSecond(1436745600), "string",
|
216
|
+
Timestamp.ofEpochSecond(1436745600), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "keep_me",
|
217
217
|
null, null, null, null, null, null, "keep_me");
|
218
218
|
|
219
219
|
assertEquals(2, records.size());
|
@@ -259,9 +259,9 @@ public class TestColumnVisitorImpl
|
|
259
259
|
assertEquals("keep_me", record[0]);
|
260
260
|
assertEquals(Timestamp.ofEpochSecond(1436745600), record[1]);
|
261
261
|
assertEquals("string", record[2]);
|
262
|
-
assertEquals(
|
263
|
-
assertEquals(
|
264
|
-
assertEquals(
|
262
|
+
assertEquals(Boolean.valueOf(true), record[3]);
|
263
|
+
assertEquals(Long.valueOf(0), record[4]);
|
264
|
+
assertEquals(Double.valueOf(0.5), record[5]);
|
265
265
|
assertEquals("{\"foo\":\"bar\"}", record[6].toString());
|
266
266
|
}
|
267
267
|
}
|
@@ -72,8 +72,8 @@ public class TestJsonColumn
|
|
72
72
|
assertEquals(null, JsonColumn.getTailIndex("$['foo'].bar.baz"));
|
73
73
|
assertEquals(null, JsonColumn.getTailIndex("$.foo.bar"));
|
74
74
|
assertEquals(null, JsonColumn.getTailIndex("$.foo"));
|
75
|
-
assertEquals(
|
76
|
-
assertEquals(
|
77
|
-
assertEquals(
|
75
|
+
assertEquals(Long.valueOf(1), JsonColumn.getTailIndex("$.foo[0][1]"));
|
76
|
+
assertEquals(Long.valueOf(0), JsonColumn.getTailIndex("$.foo[0]"));
|
77
|
+
assertEquals(Long.valueOf(0), JsonColumn.getTailIndex("$[0]"));
|
78
78
|
}
|
79
79
|
}
|
@@ -1,7 +1,6 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
3
|
import org.embulk.config.ConfigException;
|
4
|
-
import org.embulk.EmbulkTestRuntime;
|
5
4
|
import org.embulk.config.ConfigLoader;
|
6
5
|
import org.embulk.config.ConfigSource;
|
7
6
|
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
@@ -21,14 +20,13 @@ import static org.junit.Assert.assertFalse;
|
|
21
20
|
import static org.junit.Assert.assertTrue;
|
22
21
|
|
23
22
|
import java.util.ArrayList;
|
24
|
-
import java.util.Arrays;
|
25
23
|
import java.util.HashMap;
|
26
24
|
import java.util.HashSet;
|
27
25
|
|
28
26
|
public class TestJsonVisitor
|
29
27
|
{
|
30
28
|
@Rule
|
31
|
-
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
29
|
+
public org.embulk.EmbulkTestRuntime runtime = new org.embulk.EmbulkTestRuntime();
|
32
30
|
|
33
31
|
@Before
|
34
32
|
public void createResource()
|
@@ -153,24 +151,24 @@ public class TestJsonVisitor
|
|
153
151
|
}
|
154
152
|
|
155
153
|
@Test(expected = ConfigException.class)
|
156
|
-
public void
|
154
|
+
public void assertDoNotEndsWithArrayWildcard_AddColumns()
|
157
155
|
{
|
158
156
|
PluginTask task = taskFromYamlString(
|
159
157
|
"type: column",
|
160
|
-
"
|
161
|
-
" - {name: \"$.json1.b.b[*]\"}");
|
158
|
+
"add_columns:",
|
159
|
+
" - {name: \"$.json1.b.b[*]\", type: json, default: []}");
|
162
160
|
Schema inputSchema = Schema.builder().build();
|
163
161
|
// b[*] should be written as b
|
164
162
|
jsonVisitor(task, inputSchema);
|
165
163
|
}
|
166
164
|
|
167
165
|
@Test(expected = ConfigException.class)
|
168
|
-
public void
|
166
|
+
public void assertDoNotEndsWithArrayWildcard_Columns()
|
169
167
|
{
|
170
168
|
PluginTask task = taskFromYamlString(
|
171
169
|
"type: column",
|
172
|
-
"
|
173
|
-
" - {name: \"$.json1.b.b[*]\"
|
170
|
+
"columns:",
|
171
|
+
" - {name: \"$.json1.b.b[*]\"}");
|
174
172
|
Schema inputSchema = Schema.builder().build();
|
175
173
|
// b[*] should be written as b
|
176
174
|
jsonVisitor(task, inputSchema);
|
@@ -219,18 +217,6 @@ public class TestJsonVisitor
|
|
219
217
|
}
|
220
218
|
}
|
221
219
|
|
222
|
-
@Test(expected = ConfigException.class)
|
223
|
-
public void buildJsonColumns_ConfigException()
|
224
|
-
{
|
225
|
-
PluginTask task = taskFromYamlString(
|
226
|
-
"type: column",
|
227
|
-
"columns:",
|
228
|
-
" - {name: \"$.json1.b.b[*]\"}");
|
229
|
-
Schema inputSchema = Schema.builder().build();
|
230
|
-
// b[*] should be written as b
|
231
|
-
jsonVisitor(task, inputSchema);
|
232
|
-
}
|
233
|
-
|
234
220
|
@Test
|
235
221
|
public void buildJsonColumns()
|
236
222
|
{
|
@@ -445,6 +431,7 @@ public class TestJsonVisitor
|
|
445
431
|
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
446
432
|
" - {name: \"$.json1.k2[0]\"}",
|
447
433
|
" - {name: \"$.json1.k3[*].k1\"}",
|
434
|
+
" - {name: \"$.json1.k3[*].k3\", src: \"$.json1.k3[*].k1\"}",
|
448
435
|
" - {name: \"$.json1.k4[*].k1\", type: string, default: v}",
|
449
436
|
" - {name: \"$.json1.k5[0].k1\", type: string, default: v}");
|
450
437
|
Schema inputSchema = Schema.builder()
|
@@ -464,7 +451,7 @@ public class TestJsonVisitor
|
|
464
451
|
k3, ValueFactory.newArray(ValueFactory.newMap(k1, v, k2, v)));
|
465
452
|
|
466
453
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
467
|
-
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k1\":\"v\"}],\"k4\":[],\"k5\":[{\"k1\":\"v\"}]}", visited.toString());
|
454
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k1\":\"v\",\"k3\":\"v\"}],\"k4\":[],\"k5\":[{\"k1\":\"v\"}]}", visited.toString());
|
468
455
|
}
|
469
456
|
|
470
457
|
@Test
|
@@ -731,7 +718,8 @@ public class TestJsonVisitor
|
|
731
718
|
}
|
732
719
|
|
733
720
|
@Test(expected = ConfigException.class)
|
734
|
-
public void configException_MultiProperties()
|
721
|
+
public void configException_MultiProperties()
|
722
|
+
{
|
735
723
|
PluginTask task = taskFromYamlString(
|
736
724
|
"type: column",
|
737
725
|
"columns:",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.0.
|
4
|
+
version: 0.6.0.pre6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -11,33 +11,33 @@ cert_chain: []
|
|
11
11
|
date: 2016-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - ~>
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
|
20
|
-
prerelease: false
|
21
|
-
type: :development
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
22
|
- - ~>
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '10.0'
|
33
|
-
|
34
|
-
prerelease: false
|
35
|
-
type: :development
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
36
|
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: A filter plugin for Embulk to filter out columns.
|
42
42
|
email:
|
43
43
|
- sonots@gmail.com
|
@@ -82,7 +82,7 @@ files:
|
|
82
82
|
- classpath/accessors-smart-1.1.jar
|
83
83
|
- classpath/asm-5.0.3.jar
|
84
84
|
- classpath/commons-lang3-3.4.jar
|
85
|
-
- classpath/embulk-filter-column-0.6.0.
|
85
|
+
- classpath/embulk-filter-column-0.6.0.pre6.jar
|
86
86
|
- classpath/json-smart-2.2.1.jar
|
87
87
|
- classpath/JsonPathCompiler-0.0.12.jar
|
88
88
|
- classpath/slf4j-api-1.7.21.jar
|