embulk-filter-typecast 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9fa85a581cc7a32dbae99b2fbacac7150ffe930f
4
- data.tar.gz: fd78481a512883f60c0c360cb1ffc08335e70fa5
3
+ metadata.gz: 58d54323014090807f457eec3b84aef29060ddd2
4
+ data.tar.gz: eb7be6477a3a993ce83da5a3e404fa0e7fd3a50d
5
5
  SHA512:
6
- metadata.gz: 2dcc900cd25a80f8cb6226e3669a7a35b04065fc1a925fd0bc55ee03ee19c90315e1bacf2e00fe35713e11dc60a4ce50feba962ad4e919a5322c67f8726c6f5b
7
- data.tar.gz: 0b800b1434d4099d29fbe7429a97fef8a6dc108dddea0672238528edb06b75f656b07e0a5cc95a377ebab7dad0555a55dcd3068504c2a6dffddb38523614885f
6
+ metadata.gz: f19f899b820a7faccca9d261b8cc807ac230321b26b4ccde552e7308c6853bc01a24b7cefde15eef9061f1b06e55f0b1b31e8ff67f29ab83d4f07ac3feb488db
7
+ data.tar.gz: 479bce7de22d8ecefdd6ddde53cd602f44fefdab0394cb0b83df9eb1f67cf2aeb980aa87b24da62f72a3ce47192f13af5ba6b9c1006116fb1da0f64156313ad2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.5 (2016-11-06)
2
+
3
+ Enhancements:
4
+
5
+ * Support jsonpath bracket notation
6
+
1
7
  # 0.1.4 (2016-10-26)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -19,17 +19,23 @@ A filter plugin for Embulk to cast column type.
19
19
 
20
20
  See [example.csv](./example/example.csv) and [example.yml](./example/example.yml).
21
21
 
22
- ## JSONPath (like) name
22
+ ## JSONPath
23
23
 
24
24
  For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as:
25
25
 
26
26
  ```
27
- $.payload.key1
28
- $.payload.array[0]
29
- $.payload.array[*]
27
+ name: $.payload.key1
28
+ name: "$.payload.array[0]"
29
+ name: "$.payload.array[*]"
30
+ name: $['payload']['key1.key2']
30
31
  ```
31
32
 
32
- NOTE: JSONPath syntax is not fully supported
33
+ Following operators of JSONPath are not supported:
34
+
35
+ * Multiple properties such as `['name','name']`
36
+ * Multiple array indexes such as `[1,2]`
37
+ * Array slice such as `[1:2]`
38
+ * Filter expression such as `[?(<expression>)]`
33
39
 
34
40
  ## ToDo
35
41
 
data/build.gradle CHANGED
@@ -13,13 +13,14 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.4"
16
+ version = "0.1.5"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.8.+"
22
22
  provided "org.embulk:embulk-core:0.8.+"
23
+ compile "io.github.medjed:JsonPathCompiler:0.1.1"
23
24
 
24
25
  testCompile "junit:junit:4.+"
25
26
  testCompile "org.embulk:embulk-core:0.8.+:tests"
@@ -0,0 +1,41 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: ''
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: timestamp, type: string}
13
+ - {name: "null", type: string}
14
+ - {name: long, type: string}
15
+ - {name: string, type: string}
16
+ - {name: double, type: string}
17
+ - {name: json1, type: string}
18
+ - {name: json2, type: string}
19
+ - {name: array_str, type: string}
20
+ - {name: array_int, type: string}
21
+ - {name: ignore, type: string}
22
+ - {name: boolean, type: string}
23
+ filters:
24
+ - type: typecast
25
+ columns:
26
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
27
+ - {name: "null", type: long}
28
+ - {name: long, type: long}
29
+ - {name: string, type: string}
30
+ - {name: double, type: double}
31
+ - {name: json1, type: json}
32
+ - {name: json2, type: json}
33
+ - {name: array_str, type: json}
34
+ - {name: array_int, type: json}
35
+ - {name: boolean, type: boolean}
36
+ - {name: "$.json1.string", type: long}
37
+ - {name: "$.json2.long", type: long}
38
+ - {name: "$.array_str[0]", type: long}
39
+ - {name: "$.array_int[*]", type: long}
40
+ out:
41
+ type: "null"
@@ -1,5 +1,9 @@
1
1
  package org.embulk.filter.typecast;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Utils;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
7
  import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
4
8
  import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
5
9
 
@@ -61,7 +65,7 @@ class ColumnCaster
61
65
  {
62
66
  // columnName => TimestampParser
63
67
  for (ColumnConfig columnConfig : task.getColumns()) {
64
- if (columnConfig.getName().startsWith("$.")) {
68
+ if (PathCompiler.isProbablyJsonPath(columnConfig.getName())) {
65
69
  continue; // type: json columns do not support type: timestamp
66
70
  }
67
71
  Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
@@ -76,7 +80,7 @@ class ColumnCaster
76
80
  {
77
81
  // columnName => TimestampFormatter
78
82
  for (ColumnConfig columnConfig : task.getColumns()) {
79
- if (columnConfig.getName().startsWith("$.")) {
83
+ if (PathCompiler.isProbablyJsonPath(columnConfig.getName())) {
80
84
  continue; // type: json columns do not have type: timestamp
81
85
  }
82
86
  Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
@@ -200,7 +204,8 @@ class ColumnCaster
200
204
  }
201
205
  else if (outputType instanceof JsonType) {
202
206
  Value jsonValue = StringCast.asJson(value);
203
- String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
207
+ String name = outputColumn.getName();
208
+ String jsonPath = new StringBuilder("$").append(PropertyPathToken.getPathFragment(name)).toString();
204
209
  Value castedValue = jsonVisitor.visit(jsonPath, jsonValue);
205
210
  pageBuilder.setJson(outputColumn, castedValue);
206
211
  }
@@ -238,7 +243,8 @@ class ColumnCaster
238
243
 
239
244
  public void setFromJson(Column outputColumn, Value value)
240
245
  {
241
- String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
246
+ String name = outputColumn.getName();
247
+ String jsonPath = new StringBuilder("$").append(PropertyPathToken.getPathFragment(name)).toString();
242
248
  Value castedValue = jsonVisitor.visit(jsonPath, value);
243
249
  Type outputType = outputColumn.getType();
244
250
  if (outputType instanceof BooleanType) {
@@ -0,0 +1,78 @@
1
+ package org.embulk.filter.typecast;
2
+
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
10
+ import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
11
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
12
+ import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
13
+ import org.embulk.config.ConfigException;
14
+
15
+ public class JsonPathUtil
16
+ {
17
+ private JsonPathUtil() {}
18
+
19
+ public static String getColumnName(String jsonPath)
20
+ {
21
+ Path compiledPath;
22
+ try {
23
+ compiledPath = PathCompiler.compile(jsonPath);
24
+ }
25
+ catch (InvalidPathException e) {
26
+ throw new ConfigException(String.format("jsonpath %s, %s", jsonPath, e.getMessage()));
27
+ }
28
+ PathToken pathToken = compiledPath.getRoot();
29
+ pathToken = pathToken.next(); // skip $
30
+ return ((PropertyPathToken) pathToken).getProperties().get(0);
31
+ }
32
+
33
+ public static void assertJsonPathFormat(String path)
34
+ {
35
+ Path compiledPath;
36
+ try {
37
+ compiledPath = PathCompiler.compile(path);
38
+ }
39
+ catch (InvalidPathException e) {
40
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
41
+ }
42
+ PathToken pathToken = compiledPath.getRoot();
43
+ while (true) {
44
+ assertSupportedPathToken(pathToken, path);
45
+ if (pathToken.isLeaf()) {
46
+ break;
47
+ }
48
+ pathToken = pathToken.next();
49
+ }
50
+ }
51
+
52
+ protected static void assertSupportedPathToken(PathToken pathToken, String path)
53
+ {
54
+ if (pathToken instanceof ArrayPathToken) {
55
+ ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
56
+ assertSupportedArrayPathToken(arrayIndexOperation, path);
57
+ }
58
+ else if (pathToken instanceof ScanPathToken) {
59
+ throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
60
+ }
61
+ else if (pathToken instanceof FunctionPathToken) {
62
+ throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
63
+ }
64
+ else if (pathToken instanceof PredicatePathToken) {
65
+ throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
66
+ }
67
+ }
68
+
69
+ protected static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
70
+ {
71
+ if (arrayIndexOperation == null) {
72
+ throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
73
+ }
74
+ else if (!arrayIndexOperation.isSingleIndexOperation()) {
75
+ throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
76
+ }
77
+ }
78
+ }
@@ -1,5 +1,10 @@
1
1
  package org.embulk.filter.typecast;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
8
  import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
4
9
  import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
5
10
 
@@ -33,20 +38,33 @@ public class JsonVisitor
33
38
  this.inputSchema = inputSchema;
34
39
  this.outputSchema = outputSchema;
35
40
 
41
+ assertJsonPathFromat();
36
42
  buildShouldVisitSet();
37
43
  buildJsonPathTypeMap();
38
44
  }
39
45
 
46
+ private void assertJsonPathFromat()
47
+ {
48
+ for (ColumnConfig columnConfig : task.getColumns()) {
49
+ String name = columnConfig.getName();
50
+ if (! PathCompiler.isProbablyJsonPath(name)) {
51
+ continue;
52
+ }
53
+ JsonPathUtil.assertJsonPathFormat(name);
54
+ }
55
+ }
56
+
40
57
  private void buildJsonPathTypeMap()
41
58
  {
42
59
  // json path => Type
43
60
  for (ColumnConfig columnConfig : task.getColumns()) {
44
61
  String name = columnConfig.getName();
45
- if (!name.startsWith("$.")) {
62
+ if (! PathCompiler.isProbablyJsonPath(name)) {
46
63
  continue;
47
64
  }
65
+ Path compiledPath = PathCompiler.compile(name);
48
66
  Type type = columnConfig.getType();
49
- this.jsonPathTypeMap.put(name, type);
67
+ this.jsonPathTypeMap.put(compiledPath.toString(), type);
50
68
  }
51
69
  }
52
70
 
@@ -55,26 +73,15 @@ public class JsonVisitor
55
73
  // json partial path => Boolean to avoid unnecessary type: json visit
56
74
  for (ColumnConfig columnConfig : task.getColumns()) {
57
75
  String name = columnConfig.getName();
58
- if (!name.startsWith("$.")) {
76
+ if (! PathCompiler.isProbablyJsonPath(name)) {
59
77
  continue;
60
78
  }
61
- String[] parts = name.split("\\.");
79
+ PathToken parts = PathCompiler.compile(name).getRoot();
62
80
  StringBuilder partialPath = new StringBuilder("$");
63
- for (int i = 1; i < parts.length; i++) {
64
- if (parts[i].contains("[")) {
65
- String[] arrayParts = parts[i].split("\\[");
66
- partialPath.append(".").append(arrayParts[0]);
67
- this.shouldVisitSet.add(partialPath.toString());
68
- for (int j = 1; j < arrayParts.length; j++) {
69
- // Supports both [0] and [*]
70
- partialPath.append("[").append(arrayParts[j]);
71
- this.shouldVisitSet.add(partialPath.toString());
72
- }
73
- }
74
- else {
75
- partialPath.append(".").append(parts[i]);
76
- this.shouldVisitSet.add(partialPath.toString());
77
- }
81
+ while (! parts.isLeaf()) {
82
+ parts = parts.next(); // first next() skips "$"
83
+ partialPath.append(parts.getPathFragment());
84
+ this.shouldVisitSet.add(partialPath.toString());
78
85
  }
79
86
  }
80
87
  }
@@ -118,7 +125,8 @@ public class JsonVisitor
118
125
  int size = arrayValue.size();
119
126
  Value[] newValue = new Value[size];
120
127
  for (int i = 0; i < size; i++) {
121
- String k = new StringBuilder(rootPath).append("[").append(Integer.toString(i)).append("]").toString();
128
+ String pathFragment = ArrayPathToken.getPathFragment(i);
129
+ String k = new StringBuilder(rootPath).append(pathFragment).toString();
122
130
  if (!shouldVisit(k)) {
123
131
  k = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too
124
132
  }
@@ -135,7 +143,8 @@ public class JsonVisitor
135
143
  for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
136
144
  Value k = entry.getKey();
137
145
  Value v = entry.getValue();
138
- String newPath = new StringBuilder(rootPath).append(".").append(k.asStringValue().asString()).toString();
146
+ String pathFragment = PropertyPathToken.getPathFragment(k.asStringValue().asString());
147
+ String newPath = new StringBuilder(rootPath).append(pathFragment).toString();
139
148
  Value r = visit(newPath, v);
140
149
  newValue[i++] = k;
141
150
  newValue[i++] = r;
@@ -2,6 +2,7 @@ package org.embulk.filter.typecast;
2
2
 
3
3
  import com.google.common.base.Optional;
4
4
  import com.google.common.collect.ImmutableList;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
5
6
  import org.embulk.config.Config;
6
7
  import org.embulk.config.ConfigDefault;
7
8
  import org.embulk.config.ConfigException;
@@ -91,10 +92,10 @@ public class TypecastFilterPlugin implements FilterPlugin
91
92
  // throw if column does not exist
92
93
  for (ColumnConfig columnConfig : columnConfigs) {
93
94
  String name = columnConfig.getName();
94
- if (name.startsWith("$.")) { // check only top level column name
95
- String firstName = name.split("\\.", 3)[1];
96
- String firstNameWithoutArray = firstName.split("\\[")[0];
97
- inputSchema.lookupColumn(firstNameWithoutArray);
95
+ if (PathCompiler.isProbablyJsonPath(name)) {
96
+ // check only top level column name
97
+ String columnName = JsonPathUtil.getColumnName(name);
98
+ inputSchema.lookupColumn(columnName);
98
99
  }
99
100
  else {
100
101
  inputSchema.lookupColumn(name);
@@ -103,7 +104,7 @@ public class TypecastFilterPlugin implements FilterPlugin
103
104
  // throw if timestamp is specified in json path
104
105
  for (ColumnConfig columnConfig : columnConfigs) {
105
106
  String name = columnConfig.getName();
106
- if (name.startsWith("$.") && columnConfig.getType() instanceof TimestampType) {
107
+ if (PathCompiler.isProbablyJsonPath(name) && columnConfig.getType() instanceof TimestampType) {
107
108
  throw new ConfigException(String.format("embulk-filter-typecast: timestamp type is not supported in json column: \"%s\"", name));
108
109
  }
109
110
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-typecast
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-26 00:00:00.000000000 Z
11
+ date: 2016-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -54,6 +54,7 @@ files:
54
54
  - config/checkstyle/checkstyle.xml
55
55
  - example/empty.yml
56
56
  - example/example.csv
57
+ - example/example.yml
57
58
  - example/from_string.txt
58
59
  - example/from_string.yml
59
60
  - example/jsoncast.json
@@ -70,6 +71,7 @@ files:
70
71
  - src/main/java/org/embulk/filter/typecast/ColumnCaster.java
71
72
  - src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java
72
73
  - src/main/java/org/embulk/filter/typecast/JsonCaster.java
74
+ - src/main/java/org/embulk/filter/typecast/JsonPathUtil.java
73
75
  - src/main/java/org/embulk/filter/typecast/JsonVisitor.java
74
76
  - src/main/java/org/embulk/filter/typecast/TypecastFilterPlugin.java
75
77
  - src/main/java/org/embulk/filter/typecast/cast/BooleanCast.java
@@ -84,7 +86,13 @@ files:
84
86
  - src/test/java/org/embulk/filter/typecast/cast/TestLongCast.java
85
87
  - src/test/java/org/embulk/filter/typecast/cast/TestStringCast.java
86
88
  - src/test/java/org/embulk/filter/typecast/cast/TestTimestampCast.java
87
- - classpath/embulk-filter-typecast-0.1.4.jar
89
+ - classpath/accessors-smart-1.1.jar
90
+ - classpath/asm-5.0.3.jar
91
+ - classpath/commons-lang3-3.4.jar
92
+ - classpath/embulk-filter-typecast-0.1.5.jar
93
+ - classpath/json-smart-2.2.1.jar
94
+ - classpath/JsonPathCompiler-0.1.1.jar
95
+ - classpath/slf4j-api-1.7.21.jar
88
96
  homepage: https://github.com/sonots/embulk-filter-typecast
89
97
  licenses:
90
98
  - MIT