embulk-filter-column 0.5.0.pre1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1e88bb8b22f8f2030982764303175dd77b97a42a
4
- data.tar.gz: e7b65b4cc79b92b3aa89596fc3e1253fa7897ace
3
+ metadata.gz: 485925060323de88d131e5d28c6646faa7c1d066
4
+ data.tar.gz: 44ac8c06b82a3d893bb3e8a9f0f1f33ebd847984
5
5
  SHA512:
6
- metadata.gz: 7cc74b699dc85ec17ff45ab2d991f77219b7fa93ef03d2d444b21d36b83c47689b4d08ebe81dfc0e428134f9a380262367fd8d3cd33987b831d8628e64a79713
7
- data.tar.gz: e2cb3d98ec5b678f67d94c8b70854f2b46c5db7544cfbe1f2713653229195c1c57ee2b28c8fe423ab345a01f16b4b7d68337ebfd4c93719512e68c7c35e7a1e2
6
+ metadata.gz: 5952f424c3b9f40cdb0929c63860d6c27faf16c611313ad13f159adf30372e94a7fd0e93b7674f401fb6345829cd357a6d1096f0fb56f391c0d07007385812ec
7
+ data.tar.gz: 8676e815498a52bb0a25b0ccb2918ee08de2a2c8506b1007da03e40bf5a1db5d85a826c21412d645d3455900e5259dbb148b98711d1c4f102457fc41c7527139
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.5.0 (2016-05-31)
2
+
3
+ Enhancements:
4
+
5
+ * Support src (rename or copy columns) for JSONPath (but only partially)
6
+
1
7
  # 0.5.0.pre1 (2016-05-24)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -129,7 +129,8 @@ NOTE:
129
129
 
130
130
  * JSONPath syntax is not fully supported
131
131
  * Embulk's type: json cannot have timestamp column, so `type: timesatmp` for `add_columns` or `columns` with default is not available
132
- * `src` for `add_columns` or `columns` is not supported yet
132
+ * `src` (to rename or copy columns) for `add_columns` or `columns` is only partially supported yet
133
+ * the json path directory must be same, for example, `{name: $.foo.copy, src: $foo.bar}` works, but `{name: $foo.copy, src: $.bar.baz}` does not work
133
134
 
134
135
  ## ToDo
135
136
 
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.5.0.pre1"
16
+ version = "0.5.0"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -23,5 +23,8 @@ filters:
23
23
  - {name: d, type: timestamp, default: "2015-07-13"}
24
24
  - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
25
25
  - {name: copy_score, src: score}
26
+ - {name: $.json.foo, type: long, default: 1}
27
+ - {name: $.json.d, type: string, default: "2015-07-13"}
28
+ - {name: $.json.copy_foo, src: $.json.foo}
26
29
  out:
27
30
  type: stdout
data/example/columns.yml CHANGED
@@ -1,10 +1,3 @@
1
- # in:
2
- # type: random
3
- # rows: 100
4
- # schema:
5
- # id: primary_key
6
- # name: string
7
- # score: integer
8
1
  in:
9
2
  type: file
10
3
  path_prefix: example/example.csv
@@ -31,5 +24,6 @@ filters:
31
24
  - {name: copy_score, src: score}
32
25
  - {name: json, default: "{\"foo\":\"FOO\"}"}
33
26
  - {name: $.json.foo}
27
+ - {name: $.json.copy_foo, src: $.json.foo}
34
28
  out:
35
29
  type: stdout
@@ -19,5 +19,6 @@ filters:
19
19
  drop_columns:
20
20
  - {name: time }
21
21
  - {name: id }
22
+ - {name: $.json.foo }
22
23
  out:
23
24
  type: stdout
data/example/example.yml CHANGED
@@ -1,10 +1,3 @@
1
- # in:
2
- # type: random
3
- # rows: 100
4
- # schema:
5
- # id: primary_key
6
- # name: string
7
- # score: integer
8
1
  in:
9
2
  type: file
10
3
  path_prefix: example/example.csv
@@ -31,5 +24,6 @@ filters:
31
24
  - {name: copy_score, src: score}
32
25
  - {name: json, default: "{\"foo\":\"FOO\"}"}
33
26
  - {name: $.json.foo}
27
+ - {name: $.json.copy_foo, src: $.json.foo}
34
28
  out:
35
29
  type: stdout
@@ -1,5 +1,6 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
+ import org.embulk.config.ConfigException;
3
4
  import org.embulk.spi.type.Type;
4
5
  import org.msgpack.value.StringValue;
5
6
  import org.msgpack.value.Value;
@@ -7,40 +8,60 @@ import org.msgpack.value.ValueFactory;
7
8
 
8
9
  public class JsonColumn
9
10
  {
10
- private final String name;
11
+ private final String path;
11
12
  private final Type type;
12
13
  private final Value defaultValue;
13
- private String objectPath = null; // object path (like directory) of json path
14
- private String elementPath = null; // element path (like leaf) of json path
15
- private StringValue nameValue = null;
16
- private StringValue objectPathValue = null;
17
- private StringValue elementPathValue = null;
14
+ private final String src;
18
15
 
19
- public JsonColumn(
20
- String name,
21
- Type type)
16
+ private StringValue pathValue = null;
17
+ private String parentPath = null;
18
+ private String baseName = null;
19
+ private StringValue parentPathValue = null;
20
+ private StringValue baseNameValue = null;
21
+
22
+ private StringValue srcValue = null;
23
+ private String srcParentPath = null;
24
+ private String srcBaseName = null;
25
+ private StringValue srcParentPathValue = null;
26
+ private StringValue srcBaseNameValue = null;
27
+
28
+ public JsonColumn(String path, Type type)
22
29
  {
23
- this(name, type, null);
30
+ this(path, type, null, null);
24
31
  }
25
32
 
26
- public JsonColumn(
27
- String name,
28
- Type type,
29
- Value defaultValue)
33
+ public JsonColumn(String path, Type type, Value defaultValue)
30
34
  {
31
- this.name = name;
35
+ this(path, type, defaultValue, null);
36
+ }
37
+
38
+ public JsonColumn(String path, Type type, Value defaultValue, String src)
39
+ {
40
+ this.path = path;
32
41
  this.type = type;
33
42
  this.defaultValue = (defaultValue == null ? ValueFactory.newNil() : defaultValue);
34
- this.objectPath = objectPath(name);
35
- this.elementPath = elementPath(name);
36
- this.nameValue = ValueFactory.newString(name);
37
- this.objectPathValue = ValueFactory.newString(objectPath);
38
- this.elementPathValue = ValueFactory.newString(elementPath);
43
+ this.src = (src == null ? path : src);
44
+
45
+ this.pathValue = ValueFactory.newString(path);
46
+ this.parentPath = parentPath(path);
47
+ this.baseName = baseName(path);
48
+ this.parentPathValue = ValueFactory.newString(parentPath);
49
+ this.baseNameValue = ValueFactory.newString(baseName);
50
+
51
+ this.srcValue = ValueFactory.newString(this.src);
52
+ this.srcParentPath = parentPath(this.src);
53
+ this.srcBaseName = baseName(this.src);
54
+ this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
55
+ this.srcBaseNameValue = ValueFactory.newString(this.srcBaseName);
56
+
57
+ if (! srcParentPath.equals(parentPath)) {
58
+ throw new ConfigException(String.format("The branch (parent path) of src \"%s\" must be same with of name \"%s\" yet", src, path));
59
+ }
39
60
  }
40
61
 
41
- public String getName()
62
+ public String getPath()
42
63
  {
43
- return name;
64
+ return path;
44
65
  }
45
66
 
46
67
  public Type getType()
@@ -53,32 +74,63 @@ public class JsonColumn
53
74
  return defaultValue;
54
75
  }
55
76
 
56
- public String getObjectPath()
77
+ public String getSrc()
78
+ {
79
+ return src;
80
+ }
81
+
82
+ public StringValue getPathValue()
83
+ {
84
+ return pathValue;
85
+ }
86
+
87
+ public String getParentPath()
88
+ {
89
+ return parentPath;
90
+ }
91
+
92
+ public String getBaseName()
93
+ {
94
+ return baseName;
95
+ }
96
+
97
+ public StringValue getParentPathValue()
98
+ {
99
+ return parentPathValue;
100
+ }
101
+
102
+ public StringValue getBaseNameValue()
103
+ {
104
+ return baseNameValue;
105
+ }
106
+
107
+ public StringValue getSrcValue()
57
108
  {
58
- return objectPath;
109
+ return srcValue;
59
110
  }
60
111
 
61
- public String getElementPath()
112
+ public String getSrcParentPath()
62
113
  {
63
- return elementPath;
114
+ return srcParentPath;
64
115
  }
65
116
 
66
- public StringValue getNameValue()
117
+ public String getSrcBaseName()
67
118
  {
68
- return nameValue;
119
+ return srcBaseName;
69
120
  }
70
121
 
71
- public StringValue getObjectPathValue()
122
+ public StringValue getSrcParentPathValue()
72
123
  {
73
- return objectPathValue;
124
+ return srcParentPathValue;
74
125
  }
75
126
 
76
- public StringValue getElementPathValue()
127
+ public StringValue getSrcBaseNameValue()
77
128
  {
78
- return elementPathValue;
129
+ return srcBaseNameValue;
79
130
  }
80
131
 
81
- public static String objectPath(String path)
132
+ // like File.dirname
133
+ public static String parentPath(String path)
82
134
  {
83
135
  String[] parts = path.split("\\.");
84
136
  StringBuilder builder = new StringBuilder();
@@ -96,7 +148,7 @@ public class JsonColumn
96
148
  return builder.toString();
97
149
  }
98
150
 
99
- public static String elementPath(String path)
151
+ public static String baseName(String path)
100
152
  {
101
153
  String[] parts = path.split("\\.");
102
154
  return parts[parts.length - 1];
@@ -80,29 +80,29 @@ public class JsonVisitor
80
80
 
81
81
  private void jsonColumnsPut(String path, JsonColumn value)
82
82
  {
83
- String objectPath = JsonColumn.objectPath(path);
84
- if (! jsonColumns.containsKey(objectPath)) {
85
- jsonColumns.put(objectPath, new LinkedHashMap<String, JsonColumn>());
83
+ String parentPath = JsonColumn.parentPath(path);
84
+ if (! jsonColumns.containsKey(parentPath)) {
85
+ jsonColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
86
86
  }
87
- jsonColumns.get(objectPath).put(path, value);
87
+ jsonColumns.get(parentPath).put(path, value);
88
88
  }
89
89
 
90
90
  private void jsonAddColumnsPut(String path, JsonColumn value)
91
91
  {
92
- String objectPath = JsonColumn.objectPath(path);
93
- if (! jsonAddColumns.containsKey(objectPath)) {
94
- jsonAddColumns.put(objectPath, new LinkedHashMap<String, JsonColumn>());
92
+ String parentPath = JsonColumn.parentPath(path);
93
+ if (! jsonAddColumns.containsKey(parentPath)) {
94
+ jsonAddColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
95
95
  }
96
- jsonAddColumns.get(objectPath).put(path, value);
96
+ jsonAddColumns.get(parentPath).put(path, value);
97
97
  }
98
98
 
99
99
  private void jsonDropColumnsPut(String path)
100
100
  {
101
- String objectPath = JsonColumn.objectPath(path);
102
- if (! jsonDropColumns.containsKey(objectPath)) {
103
- jsonDropColumns.put(objectPath, new HashSet<String>());
101
+ String parentPath = JsonColumn.parentPath(path);
102
+ if (! jsonDropColumns.containsKey(parentPath)) {
103
+ jsonDropColumns.put(parentPath, new HashSet<String>());
104
104
  }
105
- jsonDropColumns.get(objectPath).add(path);
105
+ jsonDropColumns.get(parentPath).add(path);
106
106
  }
107
107
 
108
108
  // build jsonColumns, jsonAddColumns, and jsonDropColumns
@@ -131,7 +131,8 @@ public class JsonVisitor
131
131
  continue;
132
132
  }
133
133
  if (column.getSrc().isPresent()) {
134
- throw new ConfigException(String.format("columns: src is not supported for json path yet: '%s'", name));
134
+ String src = column.getSrc().get();
135
+ jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
135
136
  }
136
137
  else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
137
138
  Type type = column.getType().get();
@@ -154,7 +155,8 @@ public class JsonVisitor
154
155
  continue;
155
156
  }
156
157
  if (column.getSrc().isPresent()) {
157
- throw new ConfigException(String.format("add_columns: src is not supported for json path yet: '%s'", name));
158
+ String src = column.getSrc().get();
159
+ jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
158
160
  }
159
161
  else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
160
162
  Type type = column.getType().get();
@@ -162,7 +164,7 @@ public class JsonVisitor
162
164
  jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
163
165
  }
164
166
  else {
165
- throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
167
+ throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
166
168
  }
167
169
  }
168
170
  }
@@ -279,15 +281,16 @@ public class JsonVisitor
279
281
  }
280
282
  else if (this.jsonColumns.containsKey(rootPath)) {
281
283
  Map<Value, Value> map = mapValue.map();
282
- for (JsonColumn jsonColumn : jsonColumns.get(rootPath).values()) {
283
- Value k = jsonColumn.getElementPathValue();
284
- Value v = map.get(k);
285
- String newPath = jsonColumn.getName();
284
+ LinkedHashMap<String, JsonColumn> jsonColumns = this.jsonColumns.get(rootPath);
285
+ for (JsonColumn jsonColumn : jsonColumns.values()) {
286
+ Value src = jsonColumn.getSrcBaseNameValue();
287
+ Value v = map.get(src);
288
+ String newPath = jsonColumn.getPath();
286
289
  Value visited = visit(newPath, v);
287
290
  if (visited == null) {
288
291
  visited = jsonColumn.getDefaultValue();
289
292
  }
290
- newValue.add(i++, k);
293
+ newValue.add(i++, jsonColumn.getPathValue());
291
294
  newValue.add(i++, visited);
292
295
  }
293
296
  }
@@ -302,9 +305,16 @@ public class JsonVisitor
302
305
  }
303
306
  }
304
307
  if (this.jsonAddColumns.containsKey(rootPath)) {
305
- for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
306
- newValue.add(i++, jsonColumn.getElementPathValue());
307
- newValue.add(i++, jsonColumn.getDefaultValue());
308
+ Map<Value, Value> map = mapValue.map();
309
+ LinkedHashMap<String, JsonColumn> jsonAddColumns = this.jsonAddColumns.get(rootPath);
310
+ for (JsonColumn jsonColumn : jsonAddColumns.values()) {
311
+ Value src = jsonColumn.getSrcBaseNameValue();
312
+ Value v = map.get(src);
313
+ if (v == null) {
314
+ v = jsonColumn.getDefaultValue();
315
+ }
316
+ newValue.add(i++, jsonColumn.getPathValue());
317
+ newValue.add(i++, v);
308
318
  }
309
319
  }
310
320
  return ValueFactory.newMap(newValue.toArray(new Value[0]), true);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0.pre1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-24 00:00:00.000000000 Z
11
+ date: 2016-05-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -57,9 +57,6 @@ files:
57
57
  - example/drop_columns.yml
58
58
  - example/example.csv
59
59
  - example/example.yml
60
- - example/json_add_columns.yml
61
- - example/json_columns.yml
62
- - example/json_drop_columns.yml
63
60
  - gradle/wrapper/gradle-wrapper.jar
64
61
  - gradle/wrapper/gradle-wrapper.properties
65
62
  - gradlew
@@ -71,7 +68,7 @@ files:
71
68
  - src/main/java/org/embulk/filter/column/JsonColumn.java
72
69
  - src/main/java/org/embulk/filter/column/JsonVisitor.java
73
70
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
74
- - classpath/embulk-filter-column-0.5.0.pre1.jar
71
+ - classpath/embulk-filter-column-0.5.0.jar
75
72
  homepage: https://github.com/sonots/embulk-filter-column
76
73
  licenses:
77
74
  - MIT
@@ -87,9 +84,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
87
84
  version: '0'
88
85
  required_rubygems_version: !ruby/object:Gem::Requirement
89
86
  requirements:
90
- - - '>'
87
+ - - '>='
91
88
  - !ruby/object:Gem::Version
92
- version: 1.3.1
89
+ version: '0'
93
90
  requirements: []
94
91
  rubyforge_project:
95
92
  rubygems_version: 2.1.9
@@ -1,31 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: score, type: double}
16
- - {name: json, type: json}
17
- filters:
18
- - type: column
19
- default_timezone: "Asia/Tokyo"
20
- default_timestamp_format: "%Y-%m-%d"
21
- columns:
22
- - {name: time}
23
- - {name: id}
24
- - {name: name}
25
- - {name: score}
26
- - {name: json, default: "{}"}
27
- add_columns:
28
- - {name: $.json.foo, type: long, default: 1}
29
- - {name: $.json.d, type: string, default: "2015-07-13"}
30
- out:
31
- type: stdout
@@ -1,23 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: score, type: double}
16
- - {name: json, type: json}
17
- filters:
18
- - type: column
19
- columns:
20
- - {name: json, default: "{\"foo\":\"FOO\"}"}
21
- - {name: $.json.foo}
22
- out:
23
- type: stdout
@@ -1,22 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: score, type: double}
16
- - {name: json, type: json}
17
- filters:
18
- - type: column
19
- drop_columns:
20
- - {name: $.json.foo }
21
- out:
22
- type: stdout