embulk-filter-column 0.6.0.pre5 → 0.6.0.pre6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +0 -4
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java +2 -10
- data/src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java +3 -2
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +3 -3
- data/src/main/java/org/embulk/filter/column/JsonPathUtil.java +2 -0
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +16 -9
- data/src/test/java/org/embulk/filter/column/TestColumnVisitorImpl.java +16 -16
- data/src/test/java/org/embulk/filter/column/TestJsonColumn.java +3 -3
- data/src/test/java/org/embulk/filter/column/TestJsonVisitor.java +11 -23
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28dd927b9569d7dc3b9f9fef47bf6c4d165f0910
|
4
|
+
data.tar.gz: 83e6e55aba543066e42ae4170d75dd081042cec6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22ab4bf44258f1719eb99114a2fa9e9e45c34ef11795769ecfc356bdd46dc8735632045470567823e3b1e5ec8d1ec1ce1149d08472246d1dbcfd3287ab3abfa8
|
7
|
+
data.tar.gz: 792770891fa30db0126b01aff7081076bff3bc129d223529180ad33fe90b0f9c34e0d3e142cf1898cccad96c2bfd44f56a6ff3e66c63f7c2bb8a5ab62b101093
|
data/README.md
CHANGED
data/build.gradle
CHANGED
@@ -2,14 +2,13 @@ package org.embulk.filter.column;
|
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
4
|
import com.google.common.collect.ImmutableList;
|
5
|
-
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
6
6
|
import org.embulk.config.Config;
|
7
7
|
import org.embulk.config.ConfigDefault;
|
8
8
|
import org.embulk.config.ConfigException;
|
9
9
|
import org.embulk.config.ConfigSource;
|
10
10
|
import org.embulk.config.Task;
|
11
11
|
import org.embulk.config.TaskSource;
|
12
|
-
|
13
12
|
import org.embulk.spi.Column;
|
14
13
|
import org.embulk.spi.Exec;
|
15
14
|
import org.embulk.spi.FilterPlugin;
|
@@ -21,14 +20,11 @@ import org.embulk.spi.Schema;
|
|
21
20
|
import org.embulk.spi.SchemaConfigException;
|
22
21
|
import org.embulk.spi.time.TimestampParser;
|
23
22
|
import org.embulk.spi.type.Type;
|
24
|
-
|
25
23
|
import org.joda.time.DateTimeZone;
|
26
24
|
import org.slf4j.Logger;
|
27
25
|
|
28
26
|
import java.util.List;
|
29
27
|
|
30
|
-
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
31
|
-
|
32
28
|
public class ColumnFilterPlugin implements FilterPlugin
|
33
29
|
{
|
34
30
|
private static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
|
@@ -95,11 +91,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
95
91
|
|
96
92
|
private void configure(PluginTask task)
|
97
93
|
{
|
98
|
-
|
99
|
-
List<ColumnConfig> addColumns = task.getAddColumns();
|
100
|
-
List<ColumnConfig> dropColumns = task.getDropColumns();
|
101
|
-
|
102
|
-
if (columns.size() > 0 && dropColumns.size() > 0) {
|
94
|
+
if (task.getColumns().size() > 0 && task.getDropColumns().size() > 0) {
|
103
95
|
throw new ConfigException("Either of \"columns\", \"drop_columns\" can be specified.");
|
104
96
|
}
|
105
97
|
}
|
@@ -1,8 +1,9 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import io.github.medjed.jsonpathcompiler.expressions.Utils;
|
4
3
|
import com.google.common.base.Throwables;
|
5
4
|
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.Utils;
|
6
|
+
|
6
7
|
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
7
8
|
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
8
9
|
|
@@ -126,7 +127,7 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
126
127
|
}
|
127
128
|
else if (type instanceof LongType) {
|
128
129
|
if (columnConfig.getDefault().isPresent()) {
|
129
|
-
return
|
130
|
+
return Long.valueOf(columnConfig.getDefault().get().toString());
|
130
131
|
}
|
131
132
|
}
|
132
133
|
else if (type instanceof DoubleType) {
|
@@ -5,8 +5,8 @@ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
|
5
5
|
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
6
6
|
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
7
7
|
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
8
|
-
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
9
8
|
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
10
10
|
import io.github.medjed.jsonpathcompiler.expressions.path.WildcardPathToken;
|
11
11
|
import org.embulk.config.ConfigException;
|
12
12
|
import org.embulk.spi.type.Type;
|
@@ -33,7 +33,7 @@ public class JsonColumn
|
|
33
33
|
private StringValue srcParentPathValue = null;
|
34
34
|
private Value srcTailNameValue = null;
|
35
35
|
|
36
|
-
public static int WILDCARD_INDEX = -1;
|
36
|
+
public static final int WILDCARD_INDEX = -1;
|
37
37
|
|
38
38
|
public JsonColumn(String path, Type type)
|
39
39
|
{
|
@@ -101,7 +101,7 @@ public class JsonColumn
|
|
101
101
|
return arrayIndexOperation.indexes().get(0).longValue();
|
102
102
|
}
|
103
103
|
else if (tail instanceof WildcardPathToken) {
|
104
|
-
return
|
104
|
+
return Long.valueOf(WILDCARD_INDEX);
|
105
105
|
}
|
106
106
|
else {
|
107
107
|
return null;
|
@@ -337,19 +337,26 @@ public class JsonVisitor
|
|
337
337
|
}
|
338
338
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
339
339
|
for (JsonColumn jsonColumn : this.jsonColumns.get(rootPath).values()) {
|
340
|
-
int
|
341
|
-
if (
|
342
|
-
for (
|
343
|
-
|
344
|
-
if (
|
345
|
-
v =
|
340
|
+
int i = jsonColumn.getTailIndex().intValue();
|
341
|
+
if (i == JsonColumn.WILDCARD_INDEX) {
|
342
|
+
for (i = 0; i < size; i++) {
|
343
|
+
int src = jsonColumn.getSrcTailIndex().intValue();
|
344
|
+
if (src == JsonColumn.WILDCARD_INDEX) {
|
345
|
+
Value v = arrayValue.get(i);
|
346
|
+
if (v == null) {
|
347
|
+
v = jsonColumn.getDefaultValue();
|
348
|
+
}
|
349
|
+
String newPath = jsonColumn.getPath(); // == newArrayJsonPath(rootPath, i); // [*]
|
350
|
+
Value visited = visit(newPath, v);
|
351
|
+
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
352
|
+
}
|
353
|
+
else {
|
354
|
+
assert (false); // not supported yet
|
346
355
|
}
|
347
|
-
String newPath = jsonColumn.getPath();
|
348
|
-
Value visited = visit(newPath, v);
|
349
|
-
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
350
356
|
}
|
351
357
|
}
|
352
358
|
else {
|
359
|
+
int src = jsonColumn.getSrcTailIndex().intValue();
|
353
360
|
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
354
361
|
if (v == null) {
|
355
362
|
v = jsonColumn.getDefaultValue();
|
@@ -100,8 +100,8 @@ public class TestColumnVisitorImpl
|
|
100
100
|
.add("remove_me", STRING)
|
101
101
|
.build();
|
102
102
|
List<Object[]> records = filter(task, inputSchema,
|
103
|
-
Timestamp.ofEpochSecond(0), "string",
|
104
|
-
Timestamp.ofEpochSecond(0), "string",
|
103
|
+
Timestamp.ofEpochSecond(0), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me",
|
104
|
+
Timestamp.ofEpochSecond(0), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me");
|
105
105
|
|
106
106
|
assertEquals(2, records.size());
|
107
107
|
|
@@ -111,9 +111,9 @@ public class TestColumnVisitorImpl
|
|
111
111
|
assertEquals(6, record.length);
|
112
112
|
assertEquals(Timestamp.ofEpochSecond(0), record[0]);
|
113
113
|
assertEquals("string", record[1]);
|
114
|
-
assertEquals(
|
115
|
-
assertEquals(
|
116
|
-
assertEquals(
|
114
|
+
assertEquals(Boolean.valueOf(true), record[2]);
|
115
|
+
assertEquals(Long.valueOf(0), record[3]);
|
116
|
+
assertEquals(Double.valueOf(0.5), record[4]);
|
117
117
|
assertEquals(ValueFactory.newString("json"), record[5]);
|
118
118
|
}
|
119
119
|
}
|
@@ -140,7 +140,7 @@ public class TestColumnVisitorImpl
|
|
140
140
|
.add("remove_me", STRING)
|
141
141
|
.build();
|
142
142
|
List<Object[]> records = filter(task, inputSchema,
|
143
|
-
Timestamp.ofEpochSecond(1436745600), "string",
|
143
|
+
Timestamp.ofEpochSecond(1436745600), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me",
|
144
144
|
null, null, null, null, null, null, "remove_me");
|
145
145
|
|
146
146
|
assertEquals(2, records.size());
|
@@ -151,9 +151,9 @@ public class TestColumnVisitorImpl
|
|
151
151
|
assertEquals(6, record.length);
|
152
152
|
assertEquals(Timestamp.ofEpochSecond(1436745600), record[0]);
|
153
153
|
assertEquals("string", record[1]);
|
154
|
-
assertEquals(
|
155
|
-
assertEquals(
|
156
|
-
assertEquals(
|
154
|
+
assertEquals(Boolean.valueOf(true), record[2]);
|
155
|
+
assertEquals(Long.valueOf(0), record[3]);
|
156
|
+
assertEquals(Double.valueOf(0.5), record[4]);
|
157
157
|
assertEquals(ValueFactory.newString("json"), record[5]);
|
158
158
|
}
|
159
159
|
{
|
@@ -161,9 +161,9 @@ public class TestColumnVisitorImpl
|
|
161
161
|
assertEquals(6, record.length);
|
162
162
|
assertEquals(Timestamp.ofEpochSecond(1436745600), record[0]);
|
163
163
|
assertEquals("string", record[1]);
|
164
|
-
assertEquals(
|
165
|
-
assertEquals(
|
166
|
-
assertEquals(
|
164
|
+
assertEquals(Boolean.valueOf(true), record[2]);
|
165
|
+
assertEquals(Long.valueOf(0), record[3]);
|
166
|
+
assertEquals(Double.valueOf(0.5), record[4]);
|
167
167
|
assertEquals("{\"foo\":\"bar\"}", record[5].toString());
|
168
168
|
}
|
169
169
|
}
|
@@ -213,7 +213,7 @@ public class TestColumnVisitorImpl
|
|
213
213
|
.add("keep_me", STRING)
|
214
214
|
.build();
|
215
215
|
List<Object[]> records = filter(task, inputSchema,
|
216
|
-
Timestamp.ofEpochSecond(1436745600), "string",
|
216
|
+
Timestamp.ofEpochSecond(1436745600), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "keep_me",
|
217
217
|
null, null, null, null, null, null, "keep_me");
|
218
218
|
|
219
219
|
assertEquals(2, records.size());
|
@@ -259,9 +259,9 @@ public class TestColumnVisitorImpl
|
|
259
259
|
assertEquals("keep_me", record[0]);
|
260
260
|
assertEquals(Timestamp.ofEpochSecond(1436745600), record[1]);
|
261
261
|
assertEquals("string", record[2]);
|
262
|
-
assertEquals(
|
263
|
-
assertEquals(
|
264
|
-
assertEquals(
|
262
|
+
assertEquals(Boolean.valueOf(true), record[3]);
|
263
|
+
assertEquals(Long.valueOf(0), record[4]);
|
264
|
+
assertEquals(Double.valueOf(0.5), record[5]);
|
265
265
|
assertEquals("{\"foo\":\"bar\"}", record[6].toString());
|
266
266
|
}
|
267
267
|
}
|
@@ -72,8 +72,8 @@ public class TestJsonColumn
|
|
72
72
|
assertEquals(null, JsonColumn.getTailIndex("$['foo'].bar.baz"));
|
73
73
|
assertEquals(null, JsonColumn.getTailIndex("$.foo.bar"));
|
74
74
|
assertEquals(null, JsonColumn.getTailIndex("$.foo"));
|
75
|
-
assertEquals(
|
76
|
-
assertEquals(
|
77
|
-
assertEquals(
|
75
|
+
assertEquals(Long.valueOf(1), JsonColumn.getTailIndex("$.foo[0][1]"));
|
76
|
+
assertEquals(Long.valueOf(0), JsonColumn.getTailIndex("$.foo[0]"));
|
77
|
+
assertEquals(Long.valueOf(0), JsonColumn.getTailIndex("$[0]"));
|
78
78
|
}
|
79
79
|
}
|
@@ -1,7 +1,6 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
3
|
import org.embulk.config.ConfigException;
|
4
|
-
import org.embulk.EmbulkTestRuntime;
|
5
4
|
import org.embulk.config.ConfigLoader;
|
6
5
|
import org.embulk.config.ConfigSource;
|
7
6
|
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
@@ -21,14 +20,13 @@ import static org.junit.Assert.assertFalse;
|
|
21
20
|
import static org.junit.Assert.assertTrue;
|
22
21
|
|
23
22
|
import java.util.ArrayList;
|
24
|
-
import java.util.Arrays;
|
25
23
|
import java.util.HashMap;
|
26
24
|
import java.util.HashSet;
|
27
25
|
|
28
26
|
public class TestJsonVisitor
|
29
27
|
{
|
30
28
|
@Rule
|
31
|
-
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
29
|
+
public org.embulk.EmbulkTestRuntime runtime = new org.embulk.EmbulkTestRuntime();
|
32
30
|
|
33
31
|
@Before
|
34
32
|
public void createResource()
|
@@ -153,24 +151,24 @@ public class TestJsonVisitor
|
|
153
151
|
}
|
154
152
|
|
155
153
|
@Test(expected = ConfigException.class)
|
156
|
-
public void
|
154
|
+
public void assertDoNotEndsWithArrayWildcard_AddColumns()
|
157
155
|
{
|
158
156
|
PluginTask task = taskFromYamlString(
|
159
157
|
"type: column",
|
160
|
-
"
|
161
|
-
" - {name: \"$.json1.b.b[*]\"}");
|
158
|
+
"add_columns:",
|
159
|
+
" - {name: \"$.json1.b.b[*]\", type: json, default: []}");
|
162
160
|
Schema inputSchema = Schema.builder().build();
|
163
161
|
// b[*] should be written as b
|
164
162
|
jsonVisitor(task, inputSchema);
|
165
163
|
}
|
166
164
|
|
167
165
|
@Test(expected = ConfigException.class)
|
168
|
-
public void
|
166
|
+
public void assertDoNotEndsWithArrayWildcard_Columns()
|
169
167
|
{
|
170
168
|
PluginTask task = taskFromYamlString(
|
171
169
|
"type: column",
|
172
|
-
"
|
173
|
-
" - {name: \"$.json1.b.b[*]\"
|
170
|
+
"columns:",
|
171
|
+
" - {name: \"$.json1.b.b[*]\"}");
|
174
172
|
Schema inputSchema = Schema.builder().build();
|
175
173
|
// b[*] should be written as b
|
176
174
|
jsonVisitor(task, inputSchema);
|
@@ -219,18 +217,6 @@ public class TestJsonVisitor
|
|
219
217
|
}
|
220
218
|
}
|
221
219
|
|
222
|
-
@Test(expected = ConfigException.class)
|
223
|
-
public void buildJsonColumns_ConfigException()
|
224
|
-
{
|
225
|
-
PluginTask task = taskFromYamlString(
|
226
|
-
"type: column",
|
227
|
-
"columns:",
|
228
|
-
" - {name: \"$.json1.b.b[*]\"}");
|
229
|
-
Schema inputSchema = Schema.builder().build();
|
230
|
-
// b[*] should be written as b
|
231
|
-
jsonVisitor(task, inputSchema);
|
232
|
-
}
|
233
|
-
|
234
220
|
@Test
|
235
221
|
public void buildJsonColumns()
|
236
222
|
{
|
@@ -445,6 +431,7 @@ public class TestJsonVisitor
|
|
445
431
|
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
446
432
|
" - {name: \"$.json1.k2[0]\"}",
|
447
433
|
" - {name: \"$.json1.k3[*].k1\"}",
|
434
|
+
" - {name: \"$.json1.k3[*].k3\", src: \"$.json1.k3[*].k1\"}",
|
448
435
|
" - {name: \"$.json1.k4[*].k1\", type: string, default: v}",
|
449
436
|
" - {name: \"$.json1.k5[0].k1\", type: string, default: v}");
|
450
437
|
Schema inputSchema = Schema.builder()
|
@@ -464,7 +451,7 @@ public class TestJsonVisitor
|
|
464
451
|
k3, ValueFactory.newArray(ValueFactory.newMap(k1, v, k2, v)));
|
465
452
|
|
466
453
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
467
|
-
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k1\":\"v\"}],\"k4\":[],\"k5\":[{\"k1\":\"v\"}]}", visited.toString());
|
454
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k1\":\"v\",\"k3\":\"v\"}],\"k4\":[],\"k5\":[{\"k1\":\"v\"}]}", visited.toString());
|
468
455
|
}
|
469
456
|
|
470
457
|
@Test
|
@@ -731,7 +718,8 @@ public class TestJsonVisitor
|
|
731
718
|
}
|
732
719
|
|
733
720
|
@Test(expected = ConfigException.class)
|
734
|
-
public void configException_MultiProperties()
|
721
|
+
public void configException_MultiProperties()
|
722
|
+
{
|
735
723
|
PluginTask task = taskFromYamlString(
|
736
724
|
"type: column",
|
737
725
|
"columns:",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.0.
|
4
|
+
version: 0.6.0.pre6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -11,33 +11,33 @@ cert_chain: []
|
|
11
11
|
date: 2016-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - ~>
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
|
20
|
-
prerelease: false
|
21
|
-
type: :development
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
22
|
- - ~>
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '10.0'
|
33
|
-
|
34
|
-
prerelease: false
|
35
|
-
type: :development
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
36
|
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: A filter plugin for Embulk to filter out columns.
|
42
42
|
email:
|
43
43
|
- sonots@gmail.com
|
@@ -82,7 +82,7 @@ files:
|
|
82
82
|
- classpath/accessors-smart-1.1.jar
|
83
83
|
- classpath/asm-5.0.3.jar
|
84
84
|
- classpath/commons-lang3-3.4.jar
|
85
|
-
- classpath/embulk-filter-column-0.6.0.
|
85
|
+
- classpath/embulk-filter-column-0.6.0.pre6.jar
|
86
86
|
- classpath/json-smart-2.2.1.jar
|
87
87
|
- classpath/JsonPathCompiler-0.0.12.jar
|
88
88
|
- classpath/slf4j-api-1.7.21.jar
|