embulk-filter-column 0.5.0.pre1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1e88bb8b22f8f2030982764303175dd77b97a42a
4
- data.tar.gz: e7b65b4cc79b92b3aa89596fc3e1253fa7897ace
3
+ metadata.gz: 485925060323de88d131e5d28c6646faa7c1d066
4
+ data.tar.gz: 44ac8c06b82a3d893bb3e8a9f0f1f33ebd847984
5
5
  SHA512:
6
- metadata.gz: 7cc74b699dc85ec17ff45ab2d991f77219b7fa93ef03d2d444b21d36b83c47689b4d08ebe81dfc0e428134f9a380262367fd8d3cd33987b831d8628e64a79713
7
- data.tar.gz: e2cb3d98ec5b678f67d94c8b70854f2b46c5db7544cfbe1f2713653229195c1c57ee2b28c8fe423ab345a01f16b4b7d68337ebfd4c93719512e68c7c35e7a1e2
6
+ metadata.gz: 5952f424c3b9f40cdb0929c63860d6c27faf16c611313ad13f159adf30372e94a7fd0e93b7674f401fb6345829cd357a6d1096f0fb56f391c0d07007385812ec
7
+ data.tar.gz: 8676e815498a52bb0a25b0ccb2918ee08de2a2c8506b1007da03e40bf5a1db5d85a826c21412d645d3455900e5259dbb148b98711d1c4f102457fc41c7527139
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.5.0 (2016-05-31)
2
+
3
+ Enhancements:
4
+
5
+ * Support src (rename or copy columns) for JSONPath (but only partially)
6
+
1
7
  # 0.5.0.pre1 (2016-05-24)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -129,7 +129,8 @@ NOTE:
129
129
 
130
130
  * JSONPath syntax is not fully supported
131
131
  * Embulk's type: json cannot have timestamp column, so `type: timesatmp` for `add_columns` or `columns` with default is not available
132
- * `src` for `add_columns` or `columns` is not supported yet
132
+ * `src` (to rename or copy columns) for `add_columns` or `columns` is only partially supported yet
133
+ * the json path directory must be same, for example, `{name: $.foo.copy, src: $foo.bar}` works, but `{name: $foo.copy, src: $.bar.baz}` does not work
133
134
 
134
135
  ## ToDo
135
136
 
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.5.0.pre1"
16
+ version = "0.5.0"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -23,5 +23,8 @@ filters:
23
23
  - {name: d, type: timestamp, default: "2015-07-13"}
24
24
  - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
25
25
  - {name: copy_score, src: score}
26
+ - {name: $.json.foo, type: long, default: 1}
27
+ - {name: $.json.d, type: string, default: "2015-07-13"}
28
+ - {name: $.json.copy_foo, src: $.json.foo}
26
29
  out:
27
30
  type: stdout
data/example/columns.yml CHANGED
@@ -1,10 +1,3 @@
1
- # in:
2
- # type: random
3
- # rows: 100
4
- # schema:
5
- # id: primary_key
6
- # name: string
7
- # score: integer
8
1
  in:
9
2
  type: file
10
3
  path_prefix: example/example.csv
@@ -31,5 +24,6 @@ filters:
31
24
  - {name: copy_score, src: score}
32
25
  - {name: json, default: "{\"foo\":\"FOO\"}"}
33
26
  - {name: $.json.foo}
27
+ - {name: $.json.copy_foo, src: $.json.foo}
34
28
  out:
35
29
  type: stdout
@@ -19,5 +19,6 @@ filters:
19
19
  drop_columns:
20
20
  - {name: time }
21
21
  - {name: id }
22
+ - {name: $.json.foo }
22
23
  out:
23
24
  type: stdout
data/example/example.yml CHANGED
@@ -1,10 +1,3 @@
1
- # in:
2
- # type: random
3
- # rows: 100
4
- # schema:
5
- # id: primary_key
6
- # name: string
7
- # score: integer
8
1
  in:
9
2
  type: file
10
3
  path_prefix: example/example.csv
@@ -31,5 +24,6 @@ filters:
31
24
  - {name: copy_score, src: score}
32
25
  - {name: json, default: "{\"foo\":\"FOO\"}"}
33
26
  - {name: $.json.foo}
27
+ - {name: $.json.copy_foo, src: $.json.foo}
34
28
  out:
35
29
  type: stdout
@@ -1,5 +1,6 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
+ import org.embulk.config.ConfigException;
3
4
  import org.embulk.spi.type.Type;
4
5
  import org.msgpack.value.StringValue;
5
6
  import org.msgpack.value.Value;
@@ -7,40 +8,60 @@ import org.msgpack.value.ValueFactory;
7
8
 
8
9
  public class JsonColumn
9
10
  {
10
- private final String name;
11
+ private final String path;
11
12
  private final Type type;
12
13
  private final Value defaultValue;
13
- private String objectPath = null; // object path (like directory) of json path
14
- private String elementPath = null; // element path (like leaf) of json path
15
- private StringValue nameValue = null;
16
- private StringValue objectPathValue = null;
17
- private StringValue elementPathValue = null;
14
+ private final String src;
18
15
 
19
- public JsonColumn(
20
- String name,
21
- Type type)
16
+ private StringValue pathValue = null;
17
+ private String parentPath = null;
18
+ private String baseName = null;
19
+ private StringValue parentPathValue = null;
20
+ private StringValue baseNameValue = null;
21
+
22
+ private StringValue srcValue = null;
23
+ private String srcParentPath = null;
24
+ private String srcBaseName = null;
25
+ private StringValue srcParentPathValue = null;
26
+ private StringValue srcBaseNameValue = null;
27
+
28
+ public JsonColumn(String path, Type type)
22
29
  {
23
- this(name, type, null);
30
+ this(path, type, null, null);
24
31
  }
25
32
 
26
- public JsonColumn(
27
- String name,
28
- Type type,
29
- Value defaultValue)
33
+ public JsonColumn(String path, Type type, Value defaultValue)
30
34
  {
31
- this.name = name;
35
+ this(path, type, defaultValue, null);
36
+ }
37
+
38
+ public JsonColumn(String path, Type type, Value defaultValue, String src)
39
+ {
40
+ this.path = path;
32
41
  this.type = type;
33
42
  this.defaultValue = (defaultValue == null ? ValueFactory.newNil() : defaultValue);
34
- this.objectPath = objectPath(name);
35
- this.elementPath = elementPath(name);
36
- this.nameValue = ValueFactory.newString(name);
37
- this.objectPathValue = ValueFactory.newString(objectPath);
38
- this.elementPathValue = ValueFactory.newString(elementPath);
43
+ this.src = (src == null ? path : src);
44
+
45
+ this.pathValue = ValueFactory.newString(path);
46
+ this.parentPath = parentPath(path);
47
+ this.baseName = baseName(path);
48
+ this.parentPathValue = ValueFactory.newString(parentPath);
49
+ this.baseNameValue = ValueFactory.newString(baseName);
50
+
51
+ this.srcValue = ValueFactory.newString(this.src);
52
+ this.srcParentPath = parentPath(this.src);
53
+ this.srcBaseName = baseName(this.src);
54
+ this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
55
+ this.srcBaseNameValue = ValueFactory.newString(this.srcBaseName);
56
+
57
+ if (! srcParentPath.equals(parentPath)) {
58
+ throw new ConfigException(String.format("The branch (parent path) of src \"%s\" must be same with of name \"%s\" yet", src, path));
59
+ }
39
60
  }
40
61
 
41
- public String getName()
62
+ public String getPath()
42
63
  {
43
- return name;
64
+ return path;
44
65
  }
45
66
 
46
67
  public Type getType()
@@ -53,32 +74,63 @@ public class JsonColumn
53
74
  return defaultValue;
54
75
  }
55
76
 
56
- public String getObjectPath()
77
+ public String getSrc()
78
+ {
79
+ return src;
80
+ }
81
+
82
+ public StringValue getPathValue()
83
+ {
84
+ return pathValue;
85
+ }
86
+
87
+ public String getParentPath()
88
+ {
89
+ return parentPath;
90
+ }
91
+
92
+ public String getBaseName()
93
+ {
94
+ return baseName;
95
+ }
96
+
97
+ public StringValue getParentPathValue()
98
+ {
99
+ return parentPathValue;
100
+ }
101
+
102
+ public StringValue getBaseNameValue()
103
+ {
104
+ return baseNameValue;
105
+ }
106
+
107
+ public StringValue getSrcValue()
57
108
  {
58
- return objectPath;
109
+ return srcValue;
59
110
  }
60
111
 
61
- public String getElementPath()
112
+ public String getSrcParentPath()
62
113
  {
63
- return elementPath;
114
+ return srcParentPath;
64
115
  }
65
116
 
66
- public StringValue getNameValue()
117
+ public String getSrcBaseName()
67
118
  {
68
- return nameValue;
119
+ return srcBaseName;
69
120
  }
70
121
 
71
- public StringValue getObjectPathValue()
122
+ public StringValue getSrcParentPathValue()
72
123
  {
73
- return objectPathValue;
124
+ return srcParentPathValue;
74
125
  }
75
126
 
76
- public StringValue getElementPathValue()
127
+ public StringValue getSrcBaseNameValue()
77
128
  {
78
- return elementPathValue;
129
+ return srcBaseNameValue;
79
130
  }
80
131
 
81
- public static String objectPath(String path)
132
+ // like File.dirname
133
+ public static String parentPath(String path)
82
134
  {
83
135
  String[] parts = path.split("\\.");
84
136
  StringBuilder builder = new StringBuilder();
@@ -96,7 +148,7 @@ public class JsonColumn
96
148
  return builder.toString();
97
149
  }
98
150
 
99
- public static String elementPath(String path)
151
+ public static String baseName(String path)
100
152
  {
101
153
  String[] parts = path.split("\\.");
102
154
  return parts[parts.length - 1];
@@ -80,29 +80,29 @@ public class JsonVisitor
80
80
 
81
81
  private void jsonColumnsPut(String path, JsonColumn value)
82
82
  {
83
- String objectPath = JsonColumn.objectPath(path);
84
- if (! jsonColumns.containsKey(objectPath)) {
85
- jsonColumns.put(objectPath, new LinkedHashMap<String, JsonColumn>());
83
+ String parentPath = JsonColumn.parentPath(path);
84
+ if (! jsonColumns.containsKey(parentPath)) {
85
+ jsonColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
86
86
  }
87
- jsonColumns.get(objectPath).put(path, value);
87
+ jsonColumns.get(parentPath).put(path, value);
88
88
  }
89
89
 
90
90
  private void jsonAddColumnsPut(String path, JsonColumn value)
91
91
  {
92
- String objectPath = JsonColumn.objectPath(path);
93
- if (! jsonAddColumns.containsKey(objectPath)) {
94
- jsonAddColumns.put(objectPath, new LinkedHashMap<String, JsonColumn>());
92
+ String parentPath = JsonColumn.parentPath(path);
93
+ if (! jsonAddColumns.containsKey(parentPath)) {
94
+ jsonAddColumns.put(parentPath, new LinkedHashMap<String, JsonColumn>());
95
95
  }
96
- jsonAddColumns.get(objectPath).put(path, value);
96
+ jsonAddColumns.get(parentPath).put(path, value);
97
97
  }
98
98
 
99
99
  private void jsonDropColumnsPut(String path)
100
100
  {
101
- String objectPath = JsonColumn.objectPath(path);
102
- if (! jsonDropColumns.containsKey(objectPath)) {
103
- jsonDropColumns.put(objectPath, new HashSet<String>());
101
+ String parentPath = JsonColumn.parentPath(path);
102
+ if (! jsonDropColumns.containsKey(parentPath)) {
103
+ jsonDropColumns.put(parentPath, new HashSet<String>());
104
104
  }
105
- jsonDropColumns.get(objectPath).add(path);
105
+ jsonDropColumns.get(parentPath).add(path);
106
106
  }
107
107
 
108
108
  // build jsonColumns, jsonAddColumns, and jsonDropColumns
@@ -131,7 +131,8 @@ public class JsonVisitor
131
131
  continue;
132
132
  }
133
133
  if (column.getSrc().isPresent()) {
134
- throw new ConfigException(String.format("columns: src is not supported for json path yet: '%s'", name));
134
+ String src = column.getSrc().get();
135
+ jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
135
136
  }
136
137
  else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
137
138
  Type type = column.getType().get();
@@ -154,7 +155,8 @@ public class JsonVisitor
154
155
  continue;
155
156
  }
156
157
  if (column.getSrc().isPresent()) {
157
- throw new ConfigException(String.format("add_columns: src is not supported for json path yet: '%s'", name));
158
+ String src = column.getSrc().get();
159
+ jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
158
160
  }
159
161
  else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
160
162
  Type type = column.getType().get();
@@ -162,7 +164,7 @@ public class JsonVisitor
162
164
  jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
163
165
  }
164
166
  else {
165
- throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
167
+ throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
166
168
  }
167
169
  }
168
170
  }
@@ -279,15 +281,16 @@ public class JsonVisitor
279
281
  }
280
282
  else if (this.jsonColumns.containsKey(rootPath)) {
281
283
  Map<Value, Value> map = mapValue.map();
282
- for (JsonColumn jsonColumn : jsonColumns.get(rootPath).values()) {
283
- Value k = jsonColumn.getElementPathValue();
284
- Value v = map.get(k);
285
- String newPath = jsonColumn.getName();
284
+ LinkedHashMap<String, JsonColumn> jsonColumns = this.jsonColumns.get(rootPath);
285
+ for (JsonColumn jsonColumn : jsonColumns.values()) {
286
+ Value src = jsonColumn.getSrcBaseNameValue();
287
+ Value v = map.get(src);
288
+ String newPath = jsonColumn.getPath();
286
289
  Value visited = visit(newPath, v);
287
290
  if (visited == null) {
288
291
  visited = jsonColumn.getDefaultValue();
289
292
  }
290
- newValue.add(i++, k);
293
+ newValue.add(i++, jsonColumn.getPathValue());
291
294
  newValue.add(i++, visited);
292
295
  }
293
296
  }
@@ -302,9 +305,16 @@ public class JsonVisitor
302
305
  }
303
306
  }
304
307
  if (this.jsonAddColumns.containsKey(rootPath)) {
305
- for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
306
- newValue.add(i++, jsonColumn.getElementPathValue());
307
- newValue.add(i++, jsonColumn.getDefaultValue());
308
+ Map<Value, Value> map = mapValue.map();
309
+ LinkedHashMap<String, JsonColumn> jsonAddColumns = this.jsonAddColumns.get(rootPath);
310
+ for (JsonColumn jsonColumn : jsonAddColumns.values()) {
311
+ Value src = jsonColumn.getSrcBaseNameValue();
312
+ Value v = map.get(src);
313
+ if (v == null) {
314
+ v = jsonColumn.getDefaultValue();
315
+ }
316
+ newValue.add(i++, jsonColumn.getPathValue());
317
+ newValue.add(i++, v);
308
318
  }
309
319
  }
310
320
  return ValueFactory.newMap(newValue.toArray(new Value[0]), true);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0.pre1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-24 00:00:00.000000000 Z
11
+ date: 2016-05-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -57,9 +57,6 @@ files:
57
57
  - example/drop_columns.yml
58
58
  - example/example.csv
59
59
  - example/example.yml
60
- - example/json_add_columns.yml
61
- - example/json_columns.yml
62
- - example/json_drop_columns.yml
63
60
  - gradle/wrapper/gradle-wrapper.jar
64
61
  - gradle/wrapper/gradle-wrapper.properties
65
62
  - gradlew
@@ -71,7 +68,7 @@ files:
71
68
  - src/main/java/org/embulk/filter/column/JsonColumn.java
72
69
  - src/main/java/org/embulk/filter/column/JsonVisitor.java
73
70
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
74
- - classpath/embulk-filter-column-0.5.0.pre1.jar
71
+ - classpath/embulk-filter-column-0.5.0.jar
75
72
  homepage: https://github.com/sonots/embulk-filter-column
76
73
  licenses:
77
74
  - MIT
@@ -87,9 +84,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
87
84
  version: '0'
88
85
  required_rubygems_version: !ruby/object:Gem::Requirement
89
86
  requirements:
90
- - - '>'
87
+ - - '>='
91
88
  - !ruby/object:Gem::Version
92
- version: 1.3.1
89
+ version: '0'
93
90
  requirements: []
94
91
  rubyforge_project:
95
92
  rubygems_version: 2.1.9
@@ -1,31 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: score, type: double}
16
- - {name: json, type: json}
17
- filters:
18
- - type: column
19
- default_timezone: "Asia/Tokyo"
20
- default_timestamp_format: "%Y-%m-%d"
21
- columns:
22
- - {name: time}
23
- - {name: id}
24
- - {name: name}
25
- - {name: score}
26
- - {name: json, default: "{}"}
27
- add_columns:
28
- - {name: $.json.foo, type: long, default: 1}
29
- - {name: $.json.d, type: string, default: "2015-07-13"}
30
- out:
31
- type: stdout
@@ -1,23 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: score, type: double}
16
- - {name: json, type: json}
17
- filters:
18
- - type: column
19
- columns:
20
- - {name: json, default: "{\"foo\":\"FOO\"}"}
21
- - {name: $.json.foo}
22
- out:
23
- type: stdout
@@ -1,22 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: score, type: double}
16
- - {name: json, type: json}
17
- filters:
18
- - type: column
19
- drop_columns:
20
- - {name: $.json.foo }
21
- out:
22
- type: stdout