embulk-filter-column 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +8 -0
- data/build.gradle +7 -4
- data/src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java +1 -1
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +41 -4
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +48 -30
- data/src/test/java/org/embulk/filter/column/TestJsonColumn.java +57 -0
- data/src/test/java/org/embulk/filter/column/TestJsonVisitor.java +408 -0
- metadata +5 -4
- data/src/test/java/org/embulk/filter/TestColumnFilterPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1faf79742c3c3eefadac1c9c31884e335df93761
|
4
|
+
data.tar.gz: 1b3b85f3b54c51bcc26b8dde92f6c7e8c5728238
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a5cbcd1e4b1dbf79fcdd66dc5220f6e9023c9416880824fa4f3de0d13b3685e8243a3104bb427f0c222eb7c1ac17f021786aaf33b9564ee5c487d9218faafa3
|
7
|
+
data.tar.gz: 181ea5b27402a4d95482fade22bba2a136eb69d54808289a44b4790bcfb1d345e402b063663df36f45180082a39fa318e8cda82eef2192f7b16b9bc528c6a428
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
data/build.gradle
CHANGED
@@ -3,6 +3,7 @@ plugins {
|
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
|
+
id "jacoco"
|
6
7
|
}
|
7
8
|
import com.github.jrubygradle.JRubyExec
|
8
9
|
repositories {
|
@@ -13,15 +14,17 @@ configurations {
|
|
13
14
|
provided
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.5.
|
17
|
+
version = "0.5.1"
|
17
18
|
sourceCompatibility = 1.7
|
18
19
|
targetCompatibility = 1.7
|
19
20
|
|
20
21
|
dependencies {
|
21
|
-
compile "org.embulk:embulk-core:0.8
|
22
|
-
provided "org.embulk:embulk-core:0.8
|
23
|
-
|
22
|
+
compile "org.embulk:embulk-core:0.8.+"
|
23
|
+
provided "org.embulk:embulk-core:0.8.+"
|
24
|
+
|
24
25
|
testCompile "junit:junit:4.+"
|
26
|
+
testCompile "org.embulk:embulk-core:0.8.+:tests"
|
27
|
+
testCompile "org.embulk:embulk-standards:0.8.+"
|
25
28
|
}
|
26
29
|
|
27
30
|
checkstyle {
|
@@ -106,7 +106,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
106
106
|
}
|
107
107
|
}
|
108
108
|
|
109
|
-
|
109
|
+
static Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
110
110
|
{
|
111
111
|
List<ColumnConfig> columns = task.getColumns();
|
112
112
|
List<ColumnConfig> addColumns = task.getAddColumns();
|
@@ -2,6 +2,7 @@ package org.embulk.filter.column;
|
|
2
2
|
|
3
3
|
import org.embulk.config.ConfigException;
|
4
4
|
import org.embulk.spi.type.Type;
|
5
|
+
import org.msgpack.value.IntegerValue;
|
5
6
|
import org.msgpack.value.StringValue;
|
6
7
|
import org.msgpack.value.Value;
|
7
8
|
import org.msgpack.value.ValueFactory;
|
@@ -16,12 +17,14 @@ public class JsonColumn
|
|
16
17
|
private StringValue pathValue = null;
|
17
18
|
private String parentPath = null;
|
18
19
|
private String baseName = null;
|
20
|
+
private Long baseIndex = null;
|
19
21
|
private StringValue parentPathValue = null;
|
20
22
|
private StringValue baseNameValue = null;
|
21
23
|
|
22
24
|
private StringValue srcValue = null;
|
23
25
|
private String srcParentPath = null;
|
24
26
|
private String srcBaseName = null;
|
27
|
+
private Long srcBaseIndex = null;
|
25
28
|
private StringValue srcParentPathValue = null;
|
26
29
|
private StringValue srcBaseNameValue = null;
|
27
30
|
|
@@ -45,12 +48,14 @@ public class JsonColumn
|
|
45
48
|
this.pathValue = ValueFactory.newString(path);
|
46
49
|
this.parentPath = parentPath(path);
|
47
50
|
this.baseName = baseName(path);
|
51
|
+
this.baseIndex = baseIndex(path);
|
48
52
|
this.parentPathValue = ValueFactory.newString(parentPath);
|
49
53
|
this.baseNameValue = ValueFactory.newString(baseName);
|
50
54
|
|
51
55
|
this.srcValue = ValueFactory.newString(this.src);
|
52
56
|
this.srcParentPath = parentPath(this.src);
|
53
57
|
this.srcBaseName = baseName(this.src);
|
58
|
+
this.srcBaseIndex = baseIndex(this.src);
|
54
59
|
this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
|
55
60
|
this.srcBaseNameValue = ValueFactory.newString(this.srcBaseName);
|
56
61
|
|
@@ -94,6 +99,11 @@ public class JsonColumn
|
|
94
99
|
return baseName;
|
95
100
|
}
|
96
101
|
|
102
|
+
public Long getBaseIndex()
|
103
|
+
{
|
104
|
+
return baseIndex;
|
105
|
+
}
|
106
|
+
|
97
107
|
public StringValue getParentPathValue()
|
98
108
|
{
|
99
109
|
return parentPathValue;
|
@@ -119,6 +129,11 @@ public class JsonColumn
|
|
119
129
|
return srcBaseName;
|
120
130
|
}
|
121
131
|
|
132
|
+
public Long getSrcBaseIndex()
|
133
|
+
{
|
134
|
+
return srcBaseIndex;
|
135
|
+
}
|
136
|
+
|
122
137
|
public StringValue getSrcParentPathValue()
|
123
138
|
{
|
124
139
|
return srcParentPathValue;
|
@@ -134,8 +149,7 @@ public class JsonColumn
|
|
134
149
|
{
|
135
150
|
String[] parts = path.split("\\.");
|
136
151
|
StringBuilder builder = new StringBuilder();
|
137
|
-
|
138
|
-
for (int i = 1; i < parts.length - 1; i++) {
|
152
|
+
for (int i = 0; i < parts.length - 1; i++) {
|
139
153
|
builder.append(".").append(parts[i]);
|
140
154
|
}
|
141
155
|
if (parts[parts.length - 1].contains("[")) {
|
@@ -145,12 +159,35 @@ public class JsonColumn
|
|
145
159
|
builder.append("[").append(arrayParts[j]);
|
146
160
|
}
|
147
161
|
}
|
148
|
-
return builder.toString();
|
162
|
+
return builder.deleteCharAt(0).toString();
|
149
163
|
}
|
150
164
|
|
151
165
|
public static String baseName(String path)
|
152
166
|
{
|
153
167
|
String[] parts = path.split("\\.");
|
154
|
-
|
168
|
+
String[] arrayParts = parts[parts.length - 1].split("\\[");
|
169
|
+
if (arrayParts.length == 1) { // no [i]
|
170
|
+
return arrayParts[arrayParts.length - 1];
|
171
|
+
}
|
172
|
+
else {
|
173
|
+
return "[" + arrayParts[arrayParts.length - 1];
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
public static Long baseIndex(String path)
|
178
|
+
{
|
179
|
+
String baseName = baseName(path);
|
180
|
+
if (baseName.startsWith("[") && baseName.endsWith("]")) {
|
181
|
+
String baseIndex = baseName.substring(1, baseName.length() - 1);
|
182
|
+
try {
|
183
|
+
return Long.parseLong(baseIndex);
|
184
|
+
}
|
185
|
+
catch (NumberFormatException e) {
|
186
|
+
return null;
|
187
|
+
}
|
188
|
+
}
|
189
|
+
else {
|
190
|
+
return null;
|
191
|
+
}
|
155
192
|
}
|
156
193
|
}
|
@@ -30,14 +30,14 @@ import java.util.Map;
|
|
30
30
|
|
31
31
|
public class JsonVisitor
|
32
32
|
{
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
33
|
+
static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
|
34
|
+
final PluginTask task;
|
35
|
+
final Schema inputSchema;
|
36
|
+
final Schema outputSchema;
|
37
|
+
final HashSet<String> shouldVisitSet = new HashSet<>();
|
38
|
+
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonColumns = new HashMap<>();
|
39
|
+
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonAddColumns = new HashMap<>();
|
40
|
+
final HashMap<String, HashSet<String>> jsonDropColumns = new HashMap<>();
|
41
41
|
|
42
42
|
JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
|
43
43
|
{
|
@@ -132,7 +132,7 @@ public class JsonVisitor
|
|
132
132
|
}
|
133
133
|
if (column.getSrc().isPresent()) {
|
134
134
|
String src = column.getSrc().get();
|
135
|
-
|
135
|
+
jsonColumnsPut(name, new JsonColumn(name, null, null, src));
|
136
136
|
}
|
137
137
|
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
138
138
|
Type type = column.getType().get();
|
@@ -190,7 +190,7 @@ public class JsonVisitor
|
|
190
190
|
partialPath.append(".").append(arrayParts[0]);
|
191
191
|
this.shouldVisitSet.add(partialPath.toString());
|
192
192
|
for (int j = 1; j < arrayParts.length; j++) {
|
193
|
-
//
|
193
|
+
// Simply add [0] or [*] here
|
194
194
|
partialPath.append("[").append(arrayParts[j]);
|
195
195
|
this.shouldVisitSet.add(partialPath.toString());
|
196
196
|
}
|
@@ -234,28 +234,43 @@ public class JsonVisitor
|
|
234
234
|
for (int i = 0; i < size; i++) {
|
235
235
|
String newPath = newArrayJsonPath(rootPath, i);
|
236
236
|
if (! jsonDropColumns.contains(newPath)) {
|
237
|
-
|
237
|
+
Value v = arrayValue.get(i);
|
238
|
+
newValue.add(j++, visit(newPath, v));
|
238
239
|
}
|
239
240
|
}
|
240
241
|
}
|
241
242
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
if (
|
246
|
-
|
243
|
+
for (JsonColumn jsonColumn : this.jsonColumns.get(rootPath).values()) {
|
244
|
+
int src = jsonColumn.getSrcBaseIndex().intValue();
|
245
|
+
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
246
|
+
if (v == null) {
|
247
|
+
v = jsonColumn.getDefaultValue();
|
247
248
|
}
|
249
|
+
String newPath = jsonColumn.getPath();
|
250
|
+
Value visited = visit(newPath, v);
|
251
|
+
// int i = jsonColumn.getBaseIndex().intValue();
|
252
|
+
// index is shifted, so j++ is used.
|
253
|
+
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
248
254
|
}
|
249
255
|
}
|
250
256
|
else {
|
251
257
|
for (int i = 0; i < size; i++) {
|
252
258
|
String newPath = newArrayJsonPath(rootPath, i);
|
253
|
-
|
259
|
+
Value v = arrayValue.get(i);
|
260
|
+
newValue.add(j++, visit(newPath, v));
|
254
261
|
}
|
255
262
|
}
|
256
263
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
257
264
|
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
258
|
-
|
265
|
+
int src = jsonColumn.getSrcBaseIndex().intValue();
|
266
|
+
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
267
|
+
if (v == null) {
|
268
|
+
v = jsonColumn.getDefaultValue();
|
269
|
+
}
|
270
|
+
String newPath = jsonColumn.getPath();
|
271
|
+
Value visited = visit(newPath, v);
|
272
|
+
// this ignores specified index, but appends to last now
|
273
|
+
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
259
274
|
}
|
260
275
|
}
|
261
276
|
return ValueFactory.newArray(newValue.toArray(new Value[0]), true);
|
@@ -281,17 +296,16 @@ public class JsonVisitor
|
|
281
296
|
}
|
282
297
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
283
298
|
Map<Value, Value> map = mapValue.map();
|
284
|
-
|
285
|
-
for (JsonColumn jsonColumn : jsonColumns.values()) {
|
299
|
+
for (JsonColumn jsonColumn : this.jsonColumns.get(rootPath).values()) {
|
286
300
|
Value src = jsonColumn.getSrcBaseNameValue();
|
287
301
|
Value v = map.get(src);
|
302
|
+
if (v == null) {
|
303
|
+
v = jsonColumn.getDefaultValue();
|
304
|
+
}
|
288
305
|
String newPath = jsonColumn.getPath();
|
289
306
|
Value visited = visit(newPath, v);
|
290
|
-
|
291
|
-
|
292
|
-
}
|
293
|
-
newValue.add(i++, jsonColumn.getPathValue());
|
294
|
-
newValue.add(i++, visited);
|
307
|
+
newValue.add(i++, jsonColumn.getBaseNameValue());
|
308
|
+
newValue.add(i++, visited == null ? ValueFactory.newNil() : visited);
|
295
309
|
}
|
296
310
|
}
|
297
311
|
else {
|
@@ -306,15 +320,16 @@ public class JsonVisitor
|
|
306
320
|
}
|
307
321
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
308
322
|
Map<Value, Value> map = mapValue.map();
|
309
|
-
|
310
|
-
for (JsonColumn jsonColumn : jsonAddColumns.values()) {
|
323
|
+
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
311
324
|
Value src = jsonColumn.getSrcBaseNameValue();
|
312
325
|
Value v = map.get(src);
|
313
326
|
if (v == null) {
|
314
327
|
v = jsonColumn.getDefaultValue();
|
315
328
|
}
|
316
|
-
|
317
|
-
|
329
|
+
String newPath = jsonColumn.getPath();
|
330
|
+
Value visited = visit(newPath, v);
|
331
|
+
newValue.add(i++, jsonColumn.getBaseNameValue());
|
332
|
+
newValue.add(i++, visited == null ? ValueFactory.newNil() : visited);
|
318
333
|
}
|
319
334
|
}
|
320
335
|
return ValueFactory.newMap(newValue.toArray(new Value[0]), true);
|
@@ -325,7 +340,10 @@ public class JsonVisitor
|
|
325
340
|
if (! shouldVisit(rootPath)) {
|
326
341
|
return value;
|
327
342
|
}
|
328
|
-
if (value
|
343
|
+
if (value == null) {
|
344
|
+
return null;
|
345
|
+
}
|
346
|
+
else if (value.isArrayValue()) {
|
329
347
|
return visitArray(rootPath, value.asArrayValue());
|
330
348
|
}
|
331
349
|
else if (value.isMapValue()) {
|
@@ -0,0 +1,57 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import org.junit.Test;
|
4
|
+
|
5
|
+
import static org.junit.Assert.assertEquals;
|
6
|
+
import static org.junit.Assert.fail;
|
7
|
+
|
8
|
+
import org.embulk.spi.type.Types;
|
9
|
+
import org.msgpack.value.Value;
|
10
|
+
import org.msgpack.value.ValueFactory;
|
11
|
+
|
12
|
+
public class TestJsonColumn {
|
13
|
+
@Test
|
14
|
+
public void initialize()
|
15
|
+
{
|
16
|
+
try {
|
17
|
+
JsonColumn column = new JsonColumn("$.foo.bar", Types.BOOLEAN);
|
18
|
+
assertEquals("$.foo.bar", column.getSrc());
|
19
|
+
assertEquals(ValueFactory.newNil(), column.getDefaultValue());
|
20
|
+
}
|
21
|
+
catch (Exception e) {
|
22
|
+
fail();
|
23
|
+
}
|
24
|
+
|
25
|
+
try {
|
26
|
+
Value defaultValue = ValueFactory.newBoolean(true);
|
27
|
+
JsonColumn column = new JsonColumn("$.foo.bar", Types.BOOLEAN, defaultValue);
|
28
|
+
assertEquals("$.foo.bar", column.getSrc());
|
29
|
+
assertEquals(defaultValue, column.getDefaultValue());
|
30
|
+
}
|
31
|
+
catch (Exception e) {
|
32
|
+
fail();
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
@Test
|
37
|
+
public void parentPath()
|
38
|
+
{
|
39
|
+
assertEquals("$.foo.bar", JsonColumn.parentPath("$.foo.bar.baz"));
|
40
|
+
assertEquals("$.foo", JsonColumn.parentPath("$.foo.bar"));
|
41
|
+
assertEquals("$", JsonColumn.parentPath("$.foo"));
|
42
|
+
assertEquals("$.foo[0]", JsonColumn.parentPath("$.foo[0][1]"));
|
43
|
+
assertEquals("$.foo", JsonColumn.parentPath("$.foo[0]"));
|
44
|
+
assertEquals("$", JsonColumn.parentPath("$[0]"));
|
45
|
+
}
|
46
|
+
|
47
|
+
@Test
|
48
|
+
public void baseName()
|
49
|
+
{
|
50
|
+
assertEquals("baz", JsonColumn.baseName("$.foo.bar.baz"));
|
51
|
+
assertEquals("bar", JsonColumn.baseName("$.foo.bar"));
|
52
|
+
assertEquals("foo", JsonColumn.baseName("$.foo"));
|
53
|
+
assertEquals("[1]", JsonColumn.baseName("$.foo[0][1]"));
|
54
|
+
assertEquals("[0]", JsonColumn.baseName("$.foo[0]"));
|
55
|
+
assertEquals("[0]", JsonColumn.baseName("$[0]"));
|
56
|
+
}
|
57
|
+
}
|
@@ -0,0 +1,408 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.collect.Lists;
|
5
|
+
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
6
|
+
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
7
|
+
|
8
|
+
import org.embulk.EmbulkTestRuntime;
|
9
|
+
import org.embulk.config.ConfigLoader;
|
10
|
+
import org.embulk.config.ConfigSource;
|
11
|
+
import org.embulk.config.TaskSource;
|
12
|
+
import org.embulk.spi.Column;
|
13
|
+
import org.embulk.spi.Exec;
|
14
|
+
import org.embulk.spi.FileInput;
|
15
|
+
import org.embulk.spi.ParserPlugin;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.SchemaConfig;
|
18
|
+
import org.embulk.spi.type.Type;
|
19
|
+
import org.joda.time.DateTimeZone;
|
20
|
+
import org.junit.Before;
|
21
|
+
import org.junit.Rule;
|
22
|
+
import org.junit.Test;
|
23
|
+
|
24
|
+
import static junit.framework.TestCase.assertFalse;
|
25
|
+
import static org.junit.Assert.assertEquals;
|
26
|
+
import static org.junit.Assert.fail;
|
27
|
+
|
28
|
+
import org.msgpack.value.MapValue;
|
29
|
+
import org.msgpack.value.Value;
|
30
|
+
import org.msgpack.value.ValueFactory;
|
31
|
+
|
32
|
+
import static org.embulk.spi.type.Types.BOOLEAN;
|
33
|
+
import static org.embulk.spi.type.Types.DOUBLE;
|
34
|
+
import static org.embulk.spi.type.Types.JSON;
|
35
|
+
import static org.embulk.spi.type.Types.LONG;
|
36
|
+
import static org.embulk.spi.type.Types.STRING;
|
37
|
+
import static org.embulk.spi.type.Types.TIMESTAMP;
|
38
|
+
import static org.junit.Assert.assertEquals;
|
39
|
+
import static org.junit.Assert.assertNull;
|
40
|
+
import static org.junit.Assert.assertTrue;
|
41
|
+
import static org.junit.Assert.fail;
|
42
|
+
|
43
|
+
import java.io.File;
|
44
|
+
import java.io.IOException;
|
45
|
+
import java.util.ArrayList;
|
46
|
+
import java.util.HashMap;
|
47
|
+
import java.util.HashSet;
|
48
|
+
import java.util.List;
|
49
|
+
|
50
|
+
public class TestJsonVisitor {
|
51
|
+
@Rule
|
52
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
53
|
+
|
54
|
+
@Before
|
55
|
+
public void createReasource()
|
56
|
+
{
|
57
|
+
// config = config().set("type", "column");
|
58
|
+
}
|
59
|
+
|
60
|
+
private ConfigSource config()
|
61
|
+
{
|
62
|
+
return runtime.getExec().newConfigSource();
|
63
|
+
}
|
64
|
+
|
65
|
+
private Schema schema(Column... columns)
|
66
|
+
{
|
67
|
+
return new Schema(Lists.newArrayList(columns));
|
68
|
+
}
|
69
|
+
|
70
|
+
private ConfigSource configFromYamlString(String... lines)
|
71
|
+
{
|
72
|
+
StringBuilder builder = new StringBuilder();
|
73
|
+
for (String line : lines) {
|
74
|
+
builder.append(line).append("\n");
|
75
|
+
}
|
76
|
+
String yamlString = builder.toString();
|
77
|
+
|
78
|
+
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
79
|
+
return loader.fromYamlString(yamlString);
|
80
|
+
}
|
81
|
+
|
82
|
+
private PluginTask taskFromYamlString(String... lines)
|
83
|
+
{
|
84
|
+
ConfigSource config = configFromYamlString(lines);
|
85
|
+
return config.loadConfig(PluginTask.class);
|
86
|
+
}
|
87
|
+
|
88
|
+
private JsonVisitor jsonVisitor(PluginTask task, Schema inputSchema)
|
89
|
+
{
|
90
|
+
Schema outputSchema = ColumnFilterPlugin.buildOutputSchema(task, inputSchema);
|
91
|
+
return new JsonVisitor(task, inputSchema, outputSchema);
|
92
|
+
}
|
93
|
+
|
94
|
+
@Test
|
95
|
+
public void buildShouldVisitSet()
|
96
|
+
{
|
97
|
+
PluginTask task = taskFromYamlString(
|
98
|
+
"type: column",
|
99
|
+
"columns:",
|
100
|
+
" - {name: \"$.json1.a.a.a\"}",
|
101
|
+
"add_columns:",
|
102
|
+
" - {name: \"$.json1.b.b[1].b\", type: string, default: foo}",
|
103
|
+
"drop_columns:",
|
104
|
+
" - {name: \"$.json1.c.c[*].c\"}");
|
105
|
+
Schema inputSchema = schema(
|
106
|
+
new Column(0, "json1", JSON),
|
107
|
+
new Column(1, "json2", JSON));
|
108
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
109
|
+
|
110
|
+
assertTrue(subject.shouldVisit("$.json1.a.a.a"));
|
111
|
+
assertTrue(subject.shouldVisit("$.json1.a.a"));
|
112
|
+
assertTrue(subject.shouldVisit("$.json1.a"));
|
113
|
+
assertTrue(subject.shouldVisit("$.json1.b.b[1].b"));
|
114
|
+
assertTrue(subject.shouldVisit("$.json1.b.b[1]"));
|
115
|
+
assertTrue(subject.shouldVisit("$.json1.b.b"));
|
116
|
+
assertTrue(subject.shouldVisit("$.json1.b"));
|
117
|
+
assertTrue(subject.shouldVisit("$.json1.c.c[*].c"));
|
118
|
+
assertTrue(subject.shouldVisit("$.json1.c.c[*]"));
|
119
|
+
assertTrue(subject.shouldVisit("$.json1.c.c"));
|
120
|
+
assertTrue(subject.shouldVisit("$.json1.c"));
|
121
|
+
assertTrue(subject.shouldVisit("$.json1"));
|
122
|
+
assertFalse(subject.shouldVisit("$.json2"));
|
123
|
+
}
|
124
|
+
|
125
|
+
@Test
|
126
|
+
public void buildJsonSchema_DropColumns()
|
127
|
+
{
|
128
|
+
PluginTask task = taskFromYamlString(
|
129
|
+
"type: column",
|
130
|
+
"drop_columns:",
|
131
|
+
" - {name: $.json1.a.default}",
|
132
|
+
" - {name: $.json1.a.copy}",
|
133
|
+
" - {name: \"$.json1.a.copy_array[1]\"}");
|
134
|
+
Schema inputSchema = schema(
|
135
|
+
new Column(0, "json1", JSON),
|
136
|
+
new Column(1, "json2", JSON));
|
137
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
138
|
+
|
139
|
+
assertFalse(subject.jsonDropColumns.containsKey("$.json1"));
|
140
|
+
assertTrue(subject.jsonDropColumns.containsKey("$.json1.a"));
|
141
|
+
assertTrue(subject.jsonDropColumns.containsKey("$.json1.a.copy_array"));
|
142
|
+
|
143
|
+
{
|
144
|
+
HashSet<String> jsonColumns = subject.jsonDropColumns.get("$.json1.a");
|
145
|
+
assertEquals(2, jsonColumns.size());
|
146
|
+
assertTrue(jsonColumns.contains("$.json1.a.default"));
|
147
|
+
assertTrue(jsonColumns.contains("$.json1.a.copy"));
|
148
|
+
}
|
149
|
+
|
150
|
+
{
|
151
|
+
HashSet<String> jsonColumns = subject.jsonDropColumns.get("$.json1.a.copy_array");
|
152
|
+
assertEquals(1, jsonColumns.size());
|
153
|
+
assertTrue(jsonColumns.contains("$.json1.a.copy_array[1]"));
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
@Test
|
158
|
+
public void buildJsonSchema_AddColumns()
|
159
|
+
{
|
160
|
+
PluginTask task = taskFromYamlString(
|
161
|
+
"type: column",
|
162
|
+
"add_columns:",
|
163
|
+
" - {name: $.json1.a.default, type: string, default: foo}",
|
164
|
+
" - {name: $.json1.a.copy, src: $.json1.a.src}",
|
165
|
+
" - {name: \"$.json1.a.copy_array[1]\", src: \"$.json1.a.copy_array[0]\"}");
|
166
|
+
Schema inputSchema = schema(
|
167
|
+
new Column(0, "json1", JSON),
|
168
|
+
new Column(1, "json2", JSON));
|
169
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
170
|
+
|
171
|
+
assertFalse(subject.jsonAddColumns.containsKey("$.json1"));
|
172
|
+
assertTrue(subject.jsonAddColumns.containsKey("$.json1.a"));
|
173
|
+
assertTrue(subject.jsonAddColumns.containsKey("$.json1.a.copy_array"));
|
174
|
+
|
175
|
+
{
|
176
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$.json1.a");
|
177
|
+
assertEquals(2, jsonColumns.size());
|
178
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
179
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
180
|
+
assertEquals("$.json1.a.default", keys[0]);
|
181
|
+
assertEquals("$.json1.a.default", values[0].getPath());
|
182
|
+
assertEquals("$.json1.a.copy", keys[1]);
|
183
|
+
assertEquals("$.json1.a.copy", values[1].getPath());
|
184
|
+
}
|
185
|
+
|
186
|
+
{
|
187
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$.json1.a.copy_array");
|
188
|
+
assertEquals(1, jsonColumns.size());
|
189
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
190
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
191
|
+
assertEquals("$.json1.a.copy_array[1]", keys[0]);
|
192
|
+
assertEquals("$.json1.a.copy_array[1]", values[0].getPath());
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
@Test
|
197
|
+
public void buildJsonSchema_Columns()
|
198
|
+
{
|
199
|
+
PluginTask task = taskFromYamlString(
|
200
|
+
"type: column",
|
201
|
+
"columns:",
|
202
|
+
" - {name: $.json1.a.default, type: string, default: foo}",
|
203
|
+
" - {name: $.json1.a.copy, src: $.json1.a.src}",
|
204
|
+
" - {name: \"$.json1.a.copy_array[1]\", src: \"$.json1.a.copy_array[0]\"}");
|
205
|
+
Schema inputSchema = schema(
|
206
|
+
new Column(0, "json1", JSON),
|
207
|
+
new Column(1, "json2", JSON));
|
208
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
209
|
+
|
210
|
+
assertFalse(subject.jsonColumns.containsKey("$.json1"));
|
211
|
+
assertTrue(subject.jsonColumns.containsKey("$.json1.a"));
|
212
|
+
assertTrue(subject.jsonColumns.containsKey("$.json1.a.copy_array"));
|
213
|
+
|
214
|
+
{
|
215
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$.json1.a");
|
216
|
+
assertEquals(2, jsonColumns.size());
|
217
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
218
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
219
|
+
assertEquals("$.json1.a.default", keys[0]);
|
220
|
+
assertEquals("$.json1.a.default", values[0].getPath());
|
221
|
+
assertEquals("$.json1.a.copy", keys[1]);
|
222
|
+
assertEquals("$.json1.a.copy", values[1].getPath());
|
223
|
+
}
|
224
|
+
|
225
|
+
{
|
226
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$.json1.a.copy_array");
|
227
|
+
assertEquals(1, jsonColumns.size());
|
228
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
229
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
230
|
+
assertEquals("$.json1.a.copy_array[1]", keys[0]);
|
231
|
+
assertEquals("$.json1.a.copy_array[1]", values[0].getPath());
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
@Test
|
236
|
+
public void buildJsonSchema_Mix() {
|
237
|
+
PluginTask task = taskFromYamlString(
|
238
|
+
"type: column",
|
239
|
+
"drop_columns:",
|
240
|
+
" - {name: $.json1.a.default}",
|
241
|
+
"add_columns:",
|
242
|
+
" - {name: $.json1.a.copy, src: $.json1.a.src}",
|
243
|
+
"columns:",
|
244
|
+
" - {name: \"$.json1.a.copy_array[1]\", src: \"$.json1.a.copy_array[0]\"}");
|
245
|
+
Schema inputSchema = schema(
|
246
|
+
new Column(0, "json1", JSON),
|
247
|
+
new Column(1, "json2", JSON));
|
248
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
249
|
+
|
250
|
+
assertFalse(subject.jsonDropColumns.isEmpty());
|
251
|
+
assertFalse(subject.jsonAddColumns.isEmpty());
|
252
|
+
assertTrue(subject.jsonColumns.isEmpty()); // drop_columns overcome columns
|
253
|
+
}
|
254
|
+
|
255
|
+
@Test
|
256
|
+
public void visitMap_DropColumns() {
|
257
|
+
PluginTask task = taskFromYamlString(
|
258
|
+
"type: column",
|
259
|
+
"drop_columns:",
|
260
|
+
" - {name: $.json1.k1.k1}",
|
261
|
+
" - {name: $.json1.k2}");
|
262
|
+
Schema inputSchema = schema(
|
263
|
+
new Column(0, "json1", JSON),
|
264
|
+
new Column(1, "json2", JSON));
|
265
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
266
|
+
|
267
|
+
// {"k1":{"k1":"v"},"k2":{"k2":"v"}}
|
268
|
+
Value k1 = ValueFactory.newString("k1");
|
269
|
+
Value k2 = ValueFactory.newString("k2");
|
270
|
+
Value v = ValueFactory.newString("v");
|
271
|
+
Value map = ValueFactory.newMap(
|
272
|
+
k1, ValueFactory.newMap(k1, v),
|
273
|
+
k2, ValueFactory.newMap(k2, v));
|
274
|
+
|
275
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
276
|
+
assertEquals("{\"k1\":{}}", visited.toString());
|
277
|
+
}
|
278
|
+
|
279
|
+
@Test
|
280
|
+
public void visitMap_AddColumns() {
|
281
|
+
PluginTask task = taskFromYamlString(
|
282
|
+
"type: column",
|
283
|
+
"add_columns:",
|
284
|
+
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
285
|
+
" - {name: $.json1.k3.k3, type: string, default: v}",
|
286
|
+
" - {name: $.json1.k4, src: $.json1.k2}");
|
287
|
+
Schema inputSchema = schema(
|
288
|
+
new Column(0, "json1", JSON),
|
289
|
+
new Column(1, "json2", JSON));
|
290
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
291
|
+
|
292
|
+
// {"k1":{"k1":"v"},"k2":{"k2":"v"}}
|
293
|
+
Value k1 = ValueFactory.newString("k1");
|
294
|
+
Value k2 = ValueFactory.newString("k2");
|
295
|
+
Value v = ValueFactory.newString("v");
|
296
|
+
Value map = ValueFactory.newMap(
|
297
|
+
k1, ValueFactory.newMap(k1, v),
|
298
|
+
k2, ValueFactory.newMap(k2, v));
|
299
|
+
|
300
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
301
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
302
|
+
}
|
303
|
+
|
304
|
+
@Test
|
305
|
+
public void visitMap_Columns() {
|
306
|
+
PluginTask task = taskFromYamlString(
|
307
|
+
"type: column",
|
308
|
+
"columns:",
|
309
|
+
" - {name: $.json1.k1}",
|
310
|
+
" - {name: $.json1.k2.k2}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
|
311
|
+
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
312
|
+
" - {name: $.json1.k3.k3, type: string, default: v}",
|
313
|
+
" - {name: $.json1.k4, src: $.json1.k2}");
|
314
|
+
Schema inputSchema = schema(
|
315
|
+
new Column(0, "json1", JSON),
|
316
|
+
new Column(1, "json2", JSON));
|
317
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
318
|
+
|
319
|
+
// {"k1":{"k1":"v"},"k2":{"k1":"v","k2":"v"}}
|
320
|
+
Value k1 = ValueFactory.newString("k1");
|
321
|
+
Value k2 = ValueFactory.newString("k2");
|
322
|
+
Value v = ValueFactory.newString("v");
|
323
|
+
Value map = ValueFactory.newMap(
|
324
|
+
k1, ValueFactory.newMap(k1, v),
|
325
|
+
k2, ValueFactory.newMap(k2, v));
|
326
|
+
|
327
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
328
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
329
|
+
}
|
330
|
+
|
331
|
+
@Test
|
332
|
+
public void visitArray_DropColumns() {
|
333
|
+
PluginTask task = taskFromYamlString(
|
334
|
+
"type: column",
|
335
|
+
"drop_columns:",
|
336
|
+
" - {name: \"$.json1.k1[0].k1\"}",
|
337
|
+
" - {name: \"$.json1.k2[*]\"}");
|
338
|
+
Schema inputSchema = schema(
|
339
|
+
new Column(0, "json1", JSON),
|
340
|
+
new Column(1, "json2", JSON));
|
341
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
342
|
+
|
343
|
+
// {"k1":[{"k1":"v"}[,"k2":["v","v"]}
|
344
|
+
Value k1 = ValueFactory.newString("k1");
|
345
|
+
Value k2 = ValueFactory.newString("k2");
|
346
|
+
Value v = ValueFactory.newString("v");
|
347
|
+
Value map = ValueFactory.newMap(
|
348
|
+
k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)),
|
349
|
+
k2, ValueFactory.newArray(v, v));
|
350
|
+
|
351
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
352
|
+
assertEquals("{\"k1\":[{}],\"k2\":[]}", visited.toString());
|
353
|
+
}
|
354
|
+
|
355
|
+
@Test
|
356
|
+
public void visitArray_AddColumns() {
|
357
|
+
PluginTask task = taskFromYamlString(
|
358
|
+
"type: column",
|
359
|
+
"add_columns:",
|
360
|
+
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
361
|
+
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
362
|
+
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
363
|
+
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
364
|
+
Schema inputSchema = schema(
|
365
|
+
new Column(0, "json1", JSON),
|
366
|
+
new Column(1, "json2", JSON));
|
367
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
368
|
+
|
369
|
+
// {"k1":[{"k1":"v"}],"k2":["v","v"]}
|
370
|
+
Value k1 = ValueFactory.newString("k1");
|
371
|
+
Value k2 = ValueFactory.newString("k2");
|
372
|
+
Value v = ValueFactory.newString("v");
|
373
|
+
Value map = ValueFactory.newMap(
|
374
|
+
k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)),
|
375
|
+
k2, ValueFactory.newArray(v, v));
|
376
|
+
|
377
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
378
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"},{\"k1\":\"v\"}],\"k2\":[\"v\",\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
379
|
+
}
|
380
|
+
|
381
|
+
@Test
|
382
|
+
public void visitArray_Columns() {
|
383
|
+
PluginTask task = taskFromYamlString(
|
384
|
+
"type: column",
|
385
|
+
"columns:",
|
386
|
+
" - {name: \"$.json1.k1\"}",
|
387
|
+
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
388
|
+
" - {name: \"$.json1.k2[0]\"}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
|
389
|
+
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
390
|
+
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
391
|
+
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
392
|
+
Schema inputSchema = schema(
|
393
|
+
new Column(0, "json1", JSON),
|
394
|
+
new Column(1, "json2", JSON));
|
395
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
396
|
+
|
397
|
+
// {"k1":[{"k1":"v"},"v"],"k2":["v","v"]}
|
398
|
+
Value k1 = ValueFactory.newString("k1");
|
399
|
+
Value k2 = ValueFactory.newString("k2");
|
400
|
+
Value v = ValueFactory.newString("v");
|
401
|
+
Value map = ValueFactory.newMap(
|
402
|
+
k1, ValueFactory.newArray(ValueFactory.newMap(k1, v), v),
|
403
|
+
k2, ValueFactory.newArray(v, v));
|
404
|
+
|
405
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
406
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
407
|
+
}
|
408
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -67,8 +67,9 @@ files:
|
|
67
67
|
- src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java
|
68
68
|
- src/main/java/org/embulk/filter/column/JsonColumn.java
|
69
69
|
- src/main/java/org/embulk/filter/column/JsonVisitor.java
|
70
|
-
- src/test/java/org/embulk/filter/
|
71
|
-
-
|
70
|
+
- src/test/java/org/embulk/filter/column/TestJsonColumn.java
|
71
|
+
- src/test/java/org/embulk/filter/column/TestJsonVisitor.java
|
72
|
+
- classpath/embulk-filter-column-0.5.1.jar
|
72
73
|
homepage: https://github.com/sonots/embulk-filter-column
|
73
74
|
licenses:
|
74
75
|
- MIT
|