embulk-filter-column 0.5.0.pre1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +2 -1
- data/build.gradle +1 -1
- data/example/add_columns.yml +3 -0
- data/example/columns.yml +1 -7
- data/example/drop_columns.yml +1 -0
- data/example/example.yml +1 -7
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +86 -34
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +33 -23
- metadata +5 -8
- data/example/json_add_columns.yml +0 -31
- data/example/json_columns.yml +0 -23
- data/example/json_drop_columns.yml +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 485925060323de88d131e5d28c6646faa7c1d066
|
4
|
+
data.tar.gz: 44ac8c06b82a3d893bb3e8a9f0f1f33ebd847984
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5952f424c3b9f40cdb0929c63860d6c27faf16c611313ad13f159adf30372e94a7fd0e93b7674f401fb6345829cd357a6d1096f0fb56f391c0d07007385812ec
|
7
|
+
data.tar.gz: 8676e815498a52bb0a25b0ccb2918ee08de2a2c8506b1007da03e40bf5a1db5d85a826c21412d645d3455900e5259dbb148b98711d1c4f102457fc41c7527139
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -129,7 +129,8 @@ NOTE:
|
|
129
129
|
|
130
130
|
* JSONPath syntax is not fully supported
|
131
131
|
* Embulk's type: json cannot have timestamp column, so `type: timesatmp` for `add_columns` or `columns` with default is not available
|
132
|
-
* `src` for `add_columns` or `columns` is
|
132
|
+
* `src` (to rename or copy columns) for `add_columns` or `columns` is only partially supported yet
|
133
|
+
* the json path directory must be same, for example, `{name: $.foo.copy, src: $foo.bar}` works, but `{name: $foo.copy, src: $.bar.baz}` does not work
|
133
134
|
|
134
135
|
## ToDo
|
135
136
|
|
data/build.gradle
CHANGED
data/example/add_columns.yml
CHANGED
@@ -23,5 +23,8 @@ filters:
|
|
23
23
|
- {name: d, type: timestamp, default: "2015-07-13"}
|
24
24
|
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
|
25
25
|
- {name: copy_score, src: score}
|
26
|
+
- {name: $.json.foo, type: long, default: 1}
|
27
|
+
- {name: $.json.d, type: string, default: "2015-07-13"}
|
28
|
+
- {name: $.json.copy_foo, src: $.json.foo}
|
26
29
|
out:
|
27
30
|
type: stdout
|
data/example/columns.yml
CHANGED
@@ -1,10 +1,3 @@
|
|
1
|
-
# in:
|
2
|
-
# type: random
|
3
|
-
# rows: 100
|
4
|
-
# schema:
|
5
|
-
# id: primary_key
|
6
|
-
# name: string
|
7
|
-
# score: integer
|
8
1
|
in:
|
9
2
|
type: file
|
10
3
|
path_prefix: example/example.csv
|
@@ -31,5 +24,6 @@ filters:
|
|
31
24
|
- {name: copy_score, src: score}
|
32
25
|
- {name: json, default: "{\"foo\":\"FOO\"}"}
|
33
26
|
- {name: $.json.foo}
|
27
|
+
- {name: $.json.copy_foo, src: $.json.foo}
|
34
28
|
out:
|
35
29
|
type: stdout
|
data/example/drop_columns.yml
CHANGED
data/example/example.yml
CHANGED
@@ -1,10 +1,3 @@
|
|
1
|
-
# in:
|
2
|
-
# type: random
|
3
|
-
# rows: 100
|
4
|
-
# schema:
|
5
|
-
# id: primary_key
|
6
|
-
# name: string
|
7
|
-
# score: integer
|
8
1
|
in:
|
9
2
|
type: file
|
10
3
|
path_prefix: example/example.csv
|
@@ -31,5 +24,6 @@ filters:
|
|
31
24
|
- {name: copy_score, src: score}
|
32
25
|
- {name: json, default: "{\"foo\":\"FOO\"}"}
|
33
26
|
- {name: $.json.foo}
|
27
|
+
- {name: $.json.copy_foo, src: $.json.foo}
|
34
28
|
out:
|
35
29
|
type: stdout
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
+
import org.embulk.config.ConfigException;
|
3
4
|
import org.embulk.spi.type.Type;
|
4
5
|
import org.msgpack.value.StringValue;
|
5
6
|
import org.msgpack.value.Value;
|
@@ -7,40 +8,60 @@ import org.msgpack.value.ValueFactory;
|
|
7
8
|
|
8
9
|
public class JsonColumn
|
9
10
|
{
|
10
|
-
private final String
|
11
|
+
private final String path;
|
11
12
|
private final Type type;
|
12
13
|
private final Value defaultValue;
|
13
|
-
private String
|
14
|
-
private String elementPath = null; // element path (like leaf) of json path
|
15
|
-
private StringValue nameValue = null;
|
16
|
-
private StringValue objectPathValue = null;
|
17
|
-
private StringValue elementPathValue = null;
|
14
|
+
private final String src;
|
18
15
|
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
private StringValue pathValue = null;
|
17
|
+
private String parentPath = null;
|
18
|
+
private String baseName = null;
|
19
|
+
private StringValue parentPathValue = null;
|
20
|
+
private StringValue baseNameValue = null;
|
21
|
+
|
22
|
+
private StringValue srcValue = null;
|
23
|
+
private String srcParentPath = null;
|
24
|
+
private String srcBaseName = null;
|
25
|
+
private StringValue srcParentPathValue = null;
|
26
|
+
private StringValue srcBaseNameValue = null;
|
27
|
+
|
28
|
+
public JsonColumn(String path, Type type)
|
22
29
|
{
|
23
|
-
this(
|
30
|
+
this(path, type, null, null);
|
24
31
|
}
|
25
32
|
|
26
|
-
public JsonColumn(
|
27
|
-
String name,
|
28
|
-
Type type,
|
29
|
-
Value defaultValue)
|
33
|
+
public JsonColumn(String path, Type type, Value defaultValue)
|
30
34
|
{
|
31
|
-
this
|
35
|
+
this(path, type, defaultValue, null);
|
36
|
+
}
|
37
|
+
|
38
|
+
public JsonColumn(String path, Type type, Value defaultValue, String src)
|
39
|
+
{
|
40
|
+
this.path = path;
|
32
41
|
this.type = type;
|
33
42
|
this.defaultValue = (defaultValue == null ? ValueFactory.newNil() : defaultValue);
|
34
|
-
this.
|
35
|
-
|
36
|
-
this.
|
37
|
-
this.
|
38
|
-
this.
|
43
|
+
this.src = (src == null ? path : src);
|
44
|
+
|
45
|
+
this.pathValue = ValueFactory.newString(path);
|
46
|
+
this.parentPath = parentPath(path);
|
47
|
+
this.baseName = baseName(path);
|
48
|
+
this.parentPathValue = ValueFactory.newString(parentPath);
|
49
|
+
this.baseNameValue = ValueFactory.newString(baseName);
|
50
|
+
|
51
|
+
this.srcValue = ValueFactory.newString(this.src);
|
52
|
+
this.srcParentPath = parentPath(this.src);
|
53
|
+
this.srcBaseName = baseName(this.src);
|
54
|
+
this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
|
55
|
+
this.srcBaseNameValue = ValueFactory.newString(this.srcBaseName);
|
56
|
+
|
57
|
+
if (! srcParentPath.equals(parentPath)) {
|
58
|
+
throw new ConfigException(String.format("The branch (parent path) of src \"%s\" must be same with of name \"%s\" yet", src, path));
|
59
|
+
}
|
39
60
|
}
|
40
61
|
|
41
|
-
public String
|
62
|
+
public String getPath()
|
42
63
|
{
|
43
|
-
return
|
64
|
+
return path;
|
44
65
|
}
|
45
66
|
|
46
67
|
public Type getType()
|
@@ -53,32 +74,63 @@ public class JsonColumn
|
|
53
74
|
return defaultValue;
|
54
75
|
}
|
55
76
|
|
56
|
-
public String
|
77
|
+
public String getSrc()
|
78
|
+
{
|
79
|
+
return src;
|
80
|
+
}
|
81
|
+
|
82
|
+
public StringValue getPathValue()
|
83
|
+
{
|
84
|
+
return pathValue;
|
85
|
+
}
|
86
|
+
|
87
|
+
public String getParentPath()
|
88
|
+
{
|
89
|
+
return parentPath;
|
90
|
+
}
|
91
|
+
|
92
|
+
public String getBaseName()
|
93
|
+
{
|
94
|
+
return baseName;
|
95
|
+
}
|
96
|
+
|
97
|
+
public StringValue getParentPathValue()
|
98
|
+
{
|
99
|
+
return parentPathValue;
|
100
|
+
}
|
101
|
+
|
102
|
+
public StringValue getBaseNameValue()
|
103
|
+
{
|
104
|
+
return baseNameValue;
|
105
|
+
}
|
106
|
+
|
107
|
+
public StringValue getSrcValue()
|
57
108
|
{
|
58
|
-
return
|
109
|
+
return srcValue;
|
59
110
|
}
|
60
111
|
|
61
|
-
public String
|
112
|
+
public String getSrcParentPath()
|
62
113
|
{
|
63
|
-
return
|
114
|
+
return srcParentPath;
|
64
115
|
}
|
65
116
|
|
66
|
-
public
|
117
|
+
public String getSrcBaseName()
|
67
118
|
{
|
68
|
-
return
|
119
|
+
return srcBaseName;
|
69
120
|
}
|
70
121
|
|
71
|
-
public StringValue
|
122
|
+
public StringValue getSrcParentPathValue()
|
72
123
|
{
|
73
|
-
return
|
124
|
+
return srcParentPathValue;
|
74
125
|
}
|
75
126
|
|
76
|
-
public StringValue
|
127
|
+
public StringValue getSrcBaseNameValue()
|
77
128
|
{
|
78
|
-
return
|
129
|
+
return srcBaseNameValue;
|
79
130
|
}
|
80
131
|
|
81
|
-
|
132
|
+
// like File.dirname
|
133
|
+
public static String parentPath(String path)
|
82
134
|
{
|
83
135
|
String[] parts = path.split("\\.");
|
84
136
|
StringBuilder builder = new StringBuilder();
|
@@ -96,7 +148,7 @@ public class JsonColumn
|
|
96
148
|
return builder.toString();
|
97
149
|
}
|
98
150
|
|
99
|
-
public static String
|
151
|
+
public static String baseName(String path)
|
100
152
|
{
|
101
153
|
String[] parts = path.split("\\.");
|
102
154
|
return parts[parts.length - 1];
|
@@ -80,29 +80,29 @@ public class JsonVisitor
|
|
80
80
|
|
81
81
|
private void jsonColumnsPut(String path, JsonColumn value)
|
82
82
|
{
|
83
|
-
String
|
84
|
-
if (! jsonColumns.containsKey(
|
85
|
-
jsonColumns.put(
|
83
|
+
String parentPath = JsonColumn.parentPath(path);
|
84
|
+
if (! jsonColumns.containsKey(parentPath)) {
|
85
|
+
jsonColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
|
86
86
|
}
|
87
|
-
jsonColumns.get(
|
87
|
+
jsonColumns.get(parentPath).put(path, value);
|
88
88
|
}
|
89
89
|
|
90
90
|
private void jsonAddColumnsPut(String path, JsonColumn value)
|
91
91
|
{
|
92
|
-
String
|
93
|
-
if (! jsonAddColumns.containsKey(
|
94
|
-
jsonAddColumns.put(
|
92
|
+
String parentPath = JsonColumn.parentPath(path);
|
93
|
+
if (! jsonAddColumns.containsKey(parentPath)) {
|
94
|
+
jsonAddColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
|
95
95
|
}
|
96
|
-
jsonAddColumns.get(
|
96
|
+
jsonAddColumns.get(parentPath).put(path, value);
|
97
97
|
}
|
98
98
|
|
99
99
|
private void jsonDropColumnsPut(String path)
|
100
100
|
{
|
101
|
-
String
|
102
|
-
if (! jsonDropColumns.containsKey(
|
103
|
-
jsonDropColumns.put(
|
101
|
+
String parentPath = JsonColumn.parentPath(path);
|
102
|
+
if (! jsonDropColumns.containsKey(parentPath)) {
|
103
|
+
jsonDropColumns.put(parentPath, new HashSet<String>());
|
104
104
|
}
|
105
|
-
jsonDropColumns.get(
|
105
|
+
jsonDropColumns.get(parentPath).add(path);
|
106
106
|
}
|
107
107
|
|
108
108
|
// build jsonColumns, jsonAddColumns, and jsonDropColumns
|
@@ -131,7 +131,8 @@ public class JsonVisitor
|
|
131
131
|
continue;
|
132
132
|
}
|
133
133
|
if (column.getSrc().isPresent()) {
|
134
|
-
|
134
|
+
String src = column.getSrc().get();
|
135
|
+
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
135
136
|
}
|
136
137
|
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
137
138
|
Type type = column.getType().get();
|
@@ -154,7 +155,8 @@ public class JsonVisitor
|
|
154
155
|
continue;
|
155
156
|
}
|
156
157
|
if (column.getSrc().isPresent()) {
|
157
|
-
|
158
|
+
String src = column.getSrc().get();
|
159
|
+
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
158
160
|
}
|
159
161
|
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
160
162
|
Type type = column.getType().get();
|
@@ -162,7 +164,7 @@ public class JsonVisitor
|
|
162
164
|
jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
163
165
|
}
|
164
166
|
else {
|
165
|
-
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
|
167
|
+
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
|
166
168
|
}
|
167
169
|
}
|
168
170
|
}
|
@@ -279,15 +281,16 @@ public class JsonVisitor
|
|
279
281
|
}
|
280
282
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
281
283
|
Map<Value, Value> map = mapValue.map();
|
282
|
-
|
283
|
-
|
284
|
-
Value
|
285
|
-
|
284
|
+
LinkedHashMap<String, JsonColumn> jsonColumns = this.jsonColumns.get(rootPath);
|
285
|
+
for (JsonColumn jsonColumn : jsonColumns.values()) {
|
286
|
+
Value src = jsonColumn.getSrcBaseNameValue();
|
287
|
+
Value v = map.get(src);
|
288
|
+
String newPath = jsonColumn.getPath();
|
286
289
|
Value visited = visit(newPath, v);
|
287
290
|
if (visited == null) {
|
288
291
|
visited = jsonColumn.getDefaultValue();
|
289
292
|
}
|
290
|
-
newValue.add(i++,
|
293
|
+
newValue.add(i++, jsonColumn.getPathValue());
|
291
294
|
newValue.add(i++, visited);
|
292
295
|
}
|
293
296
|
}
|
@@ -302,9 +305,16 @@ public class JsonVisitor
|
|
302
305
|
}
|
303
306
|
}
|
304
307
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
+
Map<Value, Value> map = mapValue.map();
|
309
|
+
LinkedHashMap<String, JsonColumn> jsonAddColumns = this.jsonAddColumns.get(rootPath);
|
310
|
+
for (JsonColumn jsonColumn : jsonAddColumns.values()) {
|
311
|
+
Value src = jsonColumn.getSrcBaseNameValue();
|
312
|
+
Value v = map.get(src);
|
313
|
+
if (v == null) {
|
314
|
+
v = jsonColumn.getDefaultValue();
|
315
|
+
}
|
316
|
+
newValue.add(i++, jsonColumn.getPathValue());
|
317
|
+
newValue.add(i++, v);
|
308
318
|
}
|
309
319
|
}
|
310
320
|
return ValueFactory.newMap(newValue.toArray(new Value[0]), true);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.0
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -57,9 +57,6 @@ files:
|
|
57
57
|
- example/drop_columns.yml
|
58
58
|
- example/example.csv
|
59
59
|
- example/example.yml
|
60
|
-
- example/json_add_columns.yml
|
61
|
-
- example/json_columns.yml
|
62
|
-
- example/json_drop_columns.yml
|
63
60
|
- gradle/wrapper/gradle-wrapper.jar
|
64
61
|
- gradle/wrapper/gradle-wrapper.properties
|
65
62
|
- gradlew
|
@@ -71,7 +68,7 @@ files:
|
|
71
68
|
- src/main/java/org/embulk/filter/column/JsonColumn.java
|
72
69
|
- src/main/java/org/embulk/filter/column/JsonVisitor.java
|
73
70
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
74
|
-
- classpath/embulk-filter-column-0.5.0.
|
71
|
+
- classpath/embulk-filter-column-0.5.0.jar
|
75
72
|
homepage: https://github.com/sonots/embulk-filter-column
|
76
73
|
licenses:
|
77
74
|
- MIT
|
@@ -87,9 +84,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
84
|
version: '0'
|
88
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
86
|
requirements:
|
90
|
-
- - '
|
87
|
+
- - '>='
|
91
88
|
- !ruby/object:Gem::Version
|
92
|
-
version:
|
89
|
+
version: '0'
|
93
90
|
requirements: []
|
94
91
|
rubyforge_project:
|
95
92
|
rubygems_version: 2.1.9
|
@@ -1,31 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
-
- {name: id, type: long}
|
14
|
-
- {name: name, type: string}
|
15
|
-
- {name: score, type: double}
|
16
|
-
- {name: json, type: json}
|
17
|
-
filters:
|
18
|
-
- type: column
|
19
|
-
default_timezone: "Asia/Tokyo"
|
20
|
-
default_timestamp_format: "%Y-%m-%d"
|
21
|
-
columns:
|
22
|
-
- {name: time}
|
23
|
-
- {name: id}
|
24
|
-
- {name: name}
|
25
|
-
- {name: score}
|
26
|
-
- {name: json, default: "{}"}
|
27
|
-
add_columns:
|
28
|
-
- {name: $.json.foo, type: long, default: 1}
|
29
|
-
- {name: $.json.d, type: string, default: "2015-07-13"}
|
30
|
-
out:
|
31
|
-
type: stdout
|
data/example/json_columns.yml
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
-
- {name: id, type: long}
|
14
|
-
- {name: name, type: string}
|
15
|
-
- {name: score, type: double}
|
16
|
-
- {name: json, type: json}
|
17
|
-
filters:
|
18
|
-
- type: column
|
19
|
-
columns:
|
20
|
-
- {name: json, default: "{\"foo\":\"FOO\"}"}
|
21
|
-
- {name: $.json.foo}
|
22
|
-
out:
|
23
|
-
type: stdout
|
@@ -1,22 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
-
- {name: id, type: long}
|
14
|
-
- {name: name, type: string}
|
15
|
-
- {name: score, type: double}
|
16
|
-
- {name: json, type: json}
|
17
|
-
filters:
|
18
|
-
- type: column
|
19
|
-
drop_columns:
|
20
|
-
- {name: $.json.foo }
|
21
|
-
out:
|
22
|
-
type: stdout
|