embulk-filter-column 0.5.0.pre1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +2 -1
- data/build.gradle +1 -1
- data/example/add_columns.yml +3 -0
- data/example/columns.yml +1 -7
- data/example/drop_columns.yml +1 -0
- data/example/example.yml +1 -7
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +86 -34
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +33 -23
- metadata +5 -8
- data/example/json_add_columns.yml +0 -31
- data/example/json_columns.yml +0 -23
- data/example/json_drop_columns.yml +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 485925060323de88d131e5d28c6646faa7c1d066
|
4
|
+
data.tar.gz: 44ac8c06b82a3d893bb3e8a9f0f1f33ebd847984
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5952f424c3b9f40cdb0929c63860d6c27faf16c611313ad13f159adf30372e94a7fd0e93b7674f401fb6345829cd357a6d1096f0fb56f391c0d07007385812ec
|
7
|
+
data.tar.gz: 8676e815498a52bb0a25b0ccb2918ee08de2a2c8506b1007da03e40bf5a1db5d85a826c21412d645d3455900e5259dbb148b98711d1c4f102457fc41c7527139
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -129,7 +129,8 @@ NOTE:
|
|
129
129
|
|
130
130
|
* JSONPath syntax is not fully supported
|
131
131
|
* Embulk's type: json cannot have timestamp column, so `type: timesatmp` for `add_columns` or `columns` with default is not available
|
132
|
-
* `src` for `add_columns` or `columns` is
|
132
|
+
* `src` (to rename or copy columns) for `add_columns` or `columns` is only partially supported yet
|
133
|
+
* the json path directory must be same, for example, `{name: $.foo.copy, src: $foo.bar}` works, but `{name: $foo.copy, src: $.bar.baz}` does not work
|
133
134
|
|
134
135
|
## ToDo
|
135
136
|
|
data/build.gradle
CHANGED
data/example/add_columns.yml
CHANGED
@@ -23,5 +23,8 @@ filters:
|
|
23
23
|
- {name: d, type: timestamp, default: "2015-07-13"}
|
24
24
|
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
|
25
25
|
- {name: copy_score, src: score}
|
26
|
+
- {name: $.json.foo, type: long, default: 1}
|
27
|
+
- {name: $.json.d, type: string, default: "2015-07-13"}
|
28
|
+
- {name: $.json.copy_foo, src: $.json.foo}
|
26
29
|
out:
|
27
30
|
type: stdout
|
data/example/columns.yml
CHANGED
@@ -1,10 +1,3 @@
|
|
1
|
-
# in:
|
2
|
-
# type: random
|
3
|
-
# rows: 100
|
4
|
-
# schema:
|
5
|
-
# id: primary_key
|
6
|
-
# name: string
|
7
|
-
# score: integer
|
8
1
|
in:
|
9
2
|
type: file
|
10
3
|
path_prefix: example/example.csv
|
@@ -31,5 +24,6 @@ filters:
|
|
31
24
|
- {name: copy_score, src: score}
|
32
25
|
- {name: json, default: "{\"foo\":\"FOO\"}"}
|
33
26
|
- {name: $.json.foo}
|
27
|
+
- {name: $.json.copy_foo, src: $.json.foo}
|
34
28
|
out:
|
35
29
|
type: stdout
|
data/example/drop_columns.yml
CHANGED
data/example/example.yml
CHANGED
@@ -1,10 +1,3 @@
|
|
1
|
-
# in:
|
2
|
-
# type: random
|
3
|
-
# rows: 100
|
4
|
-
# schema:
|
5
|
-
# id: primary_key
|
6
|
-
# name: string
|
7
|
-
# score: integer
|
8
1
|
in:
|
9
2
|
type: file
|
10
3
|
path_prefix: example/example.csv
|
@@ -31,5 +24,6 @@ filters:
|
|
31
24
|
- {name: copy_score, src: score}
|
32
25
|
- {name: json, default: "{\"foo\":\"FOO\"}"}
|
33
26
|
- {name: $.json.foo}
|
27
|
+
- {name: $.json.copy_foo, src: $.json.foo}
|
34
28
|
out:
|
35
29
|
type: stdout
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
+
import org.embulk.config.ConfigException;
|
3
4
|
import org.embulk.spi.type.Type;
|
4
5
|
import org.msgpack.value.StringValue;
|
5
6
|
import org.msgpack.value.Value;
|
@@ -7,40 +8,60 @@ import org.msgpack.value.ValueFactory;
|
|
7
8
|
|
8
9
|
public class JsonColumn
|
9
10
|
{
|
10
|
-
private final String
|
11
|
+
private final String path;
|
11
12
|
private final Type type;
|
12
13
|
private final Value defaultValue;
|
13
|
-
private String
|
14
|
-
private String elementPath = null; // element path (like leaf) of json path
|
15
|
-
private StringValue nameValue = null;
|
16
|
-
private StringValue objectPathValue = null;
|
17
|
-
private StringValue elementPathValue = null;
|
14
|
+
private final String src;
|
18
15
|
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
private StringValue pathValue = null;
|
17
|
+
private String parentPath = null;
|
18
|
+
private String baseName = null;
|
19
|
+
private StringValue parentPathValue = null;
|
20
|
+
private StringValue baseNameValue = null;
|
21
|
+
|
22
|
+
private StringValue srcValue = null;
|
23
|
+
private String srcParentPath = null;
|
24
|
+
private String srcBaseName = null;
|
25
|
+
private StringValue srcParentPathValue = null;
|
26
|
+
private StringValue srcBaseNameValue = null;
|
27
|
+
|
28
|
+
public JsonColumn(String path, Type type)
|
22
29
|
{
|
23
|
-
this(
|
30
|
+
this(path, type, null, null);
|
24
31
|
}
|
25
32
|
|
26
|
-
public JsonColumn(
|
27
|
-
String name,
|
28
|
-
Type type,
|
29
|
-
Value defaultValue)
|
33
|
+
public JsonColumn(String path, Type type, Value defaultValue)
|
30
34
|
{
|
31
|
-
this
|
35
|
+
this(path, type, defaultValue, null);
|
36
|
+
}
|
37
|
+
|
38
|
+
public JsonColumn(String path, Type type, Value defaultValue, String src)
|
39
|
+
{
|
40
|
+
this.path = path;
|
32
41
|
this.type = type;
|
33
42
|
this.defaultValue = (defaultValue == null ? ValueFactory.newNil() : defaultValue);
|
34
|
-
this.
|
35
|
-
|
36
|
-
this.
|
37
|
-
this.
|
38
|
-
this.
|
43
|
+
this.src = (src == null ? path : src);
|
44
|
+
|
45
|
+
this.pathValue = ValueFactory.newString(path);
|
46
|
+
this.parentPath = parentPath(path);
|
47
|
+
this.baseName = baseName(path);
|
48
|
+
this.parentPathValue = ValueFactory.newString(parentPath);
|
49
|
+
this.baseNameValue = ValueFactory.newString(baseName);
|
50
|
+
|
51
|
+
this.srcValue = ValueFactory.newString(this.src);
|
52
|
+
this.srcParentPath = parentPath(this.src);
|
53
|
+
this.srcBaseName = baseName(this.src);
|
54
|
+
this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
|
55
|
+
this.srcBaseNameValue = ValueFactory.newString(this.srcBaseName);
|
56
|
+
|
57
|
+
if (! srcParentPath.equals(parentPath)) {
|
58
|
+
throw new ConfigException(String.format("The branch (parent path) of src \"%s\" must be same with of name \"%s\" yet", src, path));
|
59
|
+
}
|
39
60
|
}
|
40
61
|
|
41
|
-
public String
|
62
|
+
public String getPath()
|
42
63
|
{
|
43
|
-
return
|
64
|
+
return path;
|
44
65
|
}
|
45
66
|
|
46
67
|
public Type getType()
|
@@ -53,32 +74,63 @@ public class JsonColumn
|
|
53
74
|
return defaultValue;
|
54
75
|
}
|
55
76
|
|
56
|
-
public String
|
77
|
+
public String getSrc()
|
78
|
+
{
|
79
|
+
return src;
|
80
|
+
}
|
81
|
+
|
82
|
+
public StringValue getPathValue()
|
83
|
+
{
|
84
|
+
return pathValue;
|
85
|
+
}
|
86
|
+
|
87
|
+
public String getParentPath()
|
88
|
+
{
|
89
|
+
return parentPath;
|
90
|
+
}
|
91
|
+
|
92
|
+
public String getBaseName()
|
93
|
+
{
|
94
|
+
return baseName;
|
95
|
+
}
|
96
|
+
|
97
|
+
public StringValue getParentPathValue()
|
98
|
+
{
|
99
|
+
return parentPathValue;
|
100
|
+
}
|
101
|
+
|
102
|
+
public StringValue getBaseNameValue()
|
103
|
+
{
|
104
|
+
return baseNameValue;
|
105
|
+
}
|
106
|
+
|
107
|
+
public StringValue getSrcValue()
|
57
108
|
{
|
58
|
-
return
|
109
|
+
return srcValue;
|
59
110
|
}
|
60
111
|
|
61
|
-
public String
|
112
|
+
public String getSrcParentPath()
|
62
113
|
{
|
63
|
-
return
|
114
|
+
return srcParentPath;
|
64
115
|
}
|
65
116
|
|
66
|
-
public
|
117
|
+
public String getSrcBaseName()
|
67
118
|
{
|
68
|
-
return
|
119
|
+
return srcBaseName;
|
69
120
|
}
|
70
121
|
|
71
|
-
public StringValue
|
122
|
+
public StringValue getSrcParentPathValue()
|
72
123
|
{
|
73
|
-
return
|
124
|
+
return srcParentPathValue;
|
74
125
|
}
|
75
126
|
|
76
|
-
public StringValue
|
127
|
+
public StringValue getSrcBaseNameValue()
|
77
128
|
{
|
78
|
-
return
|
129
|
+
return srcBaseNameValue;
|
79
130
|
}
|
80
131
|
|
81
|
-
|
132
|
+
// like File.dirname
|
133
|
+
public static String parentPath(String path)
|
82
134
|
{
|
83
135
|
String[] parts = path.split("\\.");
|
84
136
|
StringBuilder builder = new StringBuilder();
|
@@ -96,7 +148,7 @@ public class JsonColumn
|
|
96
148
|
return builder.toString();
|
97
149
|
}
|
98
150
|
|
99
|
-
public static String
|
151
|
+
public static String baseName(String path)
|
100
152
|
{
|
101
153
|
String[] parts = path.split("\\.");
|
102
154
|
return parts[parts.length - 1];
|
@@ -80,29 +80,29 @@ public class JsonVisitor
|
|
80
80
|
|
81
81
|
private void jsonColumnsPut(String path, JsonColumn value)
|
82
82
|
{
|
83
|
-
String
|
84
|
-
if (! jsonColumns.containsKey(
|
85
|
-
jsonColumns.put(
|
83
|
+
String parentPath = JsonColumn.parentPath(path);
|
84
|
+
if (! jsonColumns.containsKey(parentPath)) {
|
85
|
+
jsonColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
|
86
86
|
}
|
87
|
-
jsonColumns.get(
|
87
|
+
jsonColumns.get(parentPath).put(path, value);
|
88
88
|
}
|
89
89
|
|
90
90
|
private void jsonAddColumnsPut(String path, JsonColumn value)
|
91
91
|
{
|
92
|
-
String
|
93
|
-
if (! jsonAddColumns.containsKey(
|
94
|
-
jsonAddColumns.put(
|
92
|
+
String parentPath = JsonColumn.parentPath(path);
|
93
|
+
if (! jsonAddColumns.containsKey(parentPath)) {
|
94
|
+
jsonAddColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
|
95
95
|
}
|
96
|
-
jsonAddColumns.get(
|
96
|
+
jsonAddColumns.get(parentPath).put(path, value);
|
97
97
|
}
|
98
98
|
|
99
99
|
private void jsonDropColumnsPut(String path)
|
100
100
|
{
|
101
|
-
String
|
102
|
-
if (! jsonDropColumns.containsKey(
|
103
|
-
jsonDropColumns.put(
|
101
|
+
String parentPath = JsonColumn.parentPath(path);
|
102
|
+
if (! jsonDropColumns.containsKey(parentPath)) {
|
103
|
+
jsonDropColumns.put(parentPath, new HashSet<String>());
|
104
104
|
}
|
105
|
-
jsonDropColumns.get(
|
105
|
+
jsonDropColumns.get(parentPath).add(path);
|
106
106
|
}
|
107
107
|
|
108
108
|
// build jsonColumns, jsonAddColumns, and jsonDropColumns
|
@@ -131,7 +131,8 @@ public class JsonVisitor
|
|
131
131
|
continue;
|
132
132
|
}
|
133
133
|
if (column.getSrc().isPresent()) {
|
134
|
-
|
134
|
+
String src = column.getSrc().get();
|
135
|
+
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
135
136
|
}
|
136
137
|
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
137
138
|
Type type = column.getType().get();
|
@@ -154,7 +155,8 @@ public class JsonVisitor
|
|
154
155
|
continue;
|
155
156
|
}
|
156
157
|
if (column.getSrc().isPresent()) {
|
157
|
-
|
158
|
+
String src = column.getSrc().get();
|
159
|
+
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
158
160
|
}
|
159
161
|
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
160
162
|
Type type = column.getType().get();
|
@@ -162,7 +164,7 @@ public class JsonVisitor
|
|
162
164
|
jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
163
165
|
}
|
164
166
|
else {
|
165
|
-
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
|
167
|
+
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
|
166
168
|
}
|
167
169
|
}
|
168
170
|
}
|
@@ -279,15 +281,16 @@ public class JsonVisitor
|
|
279
281
|
}
|
280
282
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
281
283
|
Map<Value, Value> map = mapValue.map();
|
282
|
-
|
283
|
-
|
284
|
-
Value
|
285
|
-
|
284
|
+
LinkedHashMap<String, JsonColumn> jsonColumns = this.jsonColumns.get(rootPath);
|
285
|
+
for (JsonColumn jsonColumn : jsonColumns.values()) {
|
286
|
+
Value src = jsonColumn.getSrcBaseNameValue();
|
287
|
+
Value v = map.get(src);
|
288
|
+
String newPath = jsonColumn.getPath();
|
286
289
|
Value visited = visit(newPath, v);
|
287
290
|
if (visited == null) {
|
288
291
|
visited = jsonColumn.getDefaultValue();
|
289
292
|
}
|
290
|
-
newValue.add(i++,
|
293
|
+
newValue.add(i++, jsonColumn.getPathValue());
|
291
294
|
newValue.add(i++, visited);
|
292
295
|
}
|
293
296
|
}
|
@@ -302,9 +305,16 @@ public class JsonVisitor
|
|
302
305
|
}
|
303
306
|
}
|
304
307
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
+
Map<Value, Value> map = mapValue.map();
|
309
|
+
LinkedHashMap<String, JsonColumn> jsonAddColumns = this.jsonAddColumns.get(rootPath);
|
310
|
+
for (JsonColumn jsonColumn : jsonAddColumns.values()) {
|
311
|
+
Value src = jsonColumn.getSrcBaseNameValue();
|
312
|
+
Value v = map.get(src);
|
313
|
+
if (v == null) {
|
314
|
+
v = jsonColumn.getDefaultValue();
|
315
|
+
}
|
316
|
+
newValue.add(i++, jsonColumn.getPathValue());
|
317
|
+
newValue.add(i++, v);
|
308
318
|
}
|
309
319
|
}
|
310
320
|
return ValueFactory.newMap(newValue.toArray(new Value[0]), true);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.0
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -57,9 +57,6 @@ files:
|
|
57
57
|
- example/drop_columns.yml
|
58
58
|
- example/example.csv
|
59
59
|
- example/example.yml
|
60
|
-
- example/json_add_columns.yml
|
61
|
-
- example/json_columns.yml
|
62
|
-
- example/json_drop_columns.yml
|
63
60
|
- gradle/wrapper/gradle-wrapper.jar
|
64
61
|
- gradle/wrapper/gradle-wrapper.properties
|
65
62
|
- gradlew
|
@@ -71,7 +68,7 @@ files:
|
|
71
68
|
- src/main/java/org/embulk/filter/column/JsonColumn.java
|
72
69
|
- src/main/java/org/embulk/filter/column/JsonVisitor.java
|
73
70
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
74
|
-
- classpath/embulk-filter-column-0.5.0.
|
71
|
+
- classpath/embulk-filter-column-0.5.0.jar
|
75
72
|
homepage: https://github.com/sonots/embulk-filter-column
|
76
73
|
licenses:
|
77
74
|
- MIT
|
@@ -87,9 +84,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
84
|
version: '0'
|
88
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
86
|
requirements:
|
90
|
-
- - '
|
87
|
+
- - '>='
|
91
88
|
- !ruby/object:Gem::Version
|
92
|
-
version:
|
89
|
+
version: '0'
|
93
90
|
requirements: []
|
94
91
|
rubyforge_project:
|
95
92
|
rubygems_version: 2.1.9
|
@@ -1,31 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
-
- {name: id, type: long}
|
14
|
-
- {name: name, type: string}
|
15
|
-
- {name: score, type: double}
|
16
|
-
- {name: json, type: json}
|
17
|
-
filters:
|
18
|
-
- type: column
|
19
|
-
default_timezone: "Asia/Tokyo"
|
20
|
-
default_timestamp_format: "%Y-%m-%d"
|
21
|
-
columns:
|
22
|
-
- {name: time}
|
23
|
-
- {name: id}
|
24
|
-
- {name: name}
|
25
|
-
- {name: score}
|
26
|
-
- {name: json, default: "{}"}
|
27
|
-
add_columns:
|
28
|
-
- {name: $.json.foo, type: long, default: 1}
|
29
|
-
- {name: $.json.d, type: string, default: "2015-07-13"}
|
30
|
-
out:
|
31
|
-
type: stdout
|
data/example/json_columns.yml
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
-
- {name: id, type: long}
|
14
|
-
- {name: name, type: string}
|
15
|
-
- {name: score, type: double}
|
16
|
-
- {name: json, type: json}
|
17
|
-
filters:
|
18
|
-
- type: column
|
19
|
-
columns:
|
20
|
-
- {name: json, default: "{\"foo\":\"FOO\"}"}
|
21
|
-
- {name: $.json.foo}
|
22
|
-
out:
|
23
|
-
type: stdout
|
@@ -1,22 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
-
- {name: id, type: long}
|
14
|
-
- {name: name, type: string}
|
15
|
-
- {name: score, type: double}
|
16
|
-
- {name: json, type: json}
|
17
|
-
filters:
|
18
|
-
- type: column
|
19
|
-
drop_columns:
|
20
|
-
- {name: $.json.foo }
|
21
|
-
out:
|
22
|
-
type: stdout
|