embulk-filter-typecast 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9fa85a581cc7a32dbae99b2fbacac7150ffe930f
4
- data.tar.gz: fd78481a512883f60c0c360cb1ffc08335e70fa5
3
+ metadata.gz: 58d54323014090807f457eec3b84aef29060ddd2
4
+ data.tar.gz: eb7be6477a3a993ce83da5a3e404fa0e7fd3a50d
5
5
  SHA512:
6
- metadata.gz: 2dcc900cd25a80f8cb6226e3669a7a35b04065fc1a925fd0bc55ee03ee19c90315e1bacf2e00fe35713e11dc60a4ce50feba962ad4e919a5322c67f8726c6f5b
7
- data.tar.gz: 0b800b1434d4099d29fbe7429a97fef8a6dc108dddea0672238528edb06b75f656b07e0a5cc95a377ebab7dad0555a55dcd3068504c2a6dffddb38523614885f
6
+ metadata.gz: f19f899b820a7faccca9d261b8cc807ac230321b26b4ccde552e7308c6853bc01a24b7cefde15eef9061f1b06e55f0b1b31e8ff67f29ab83d4f07ac3feb488db
7
+ data.tar.gz: 479bce7de22d8ecefdd6ddde53cd602f44fefdab0394cb0b83df9eb1f67cf2aeb980aa87b24da62f72a3ce47192f13af5ba6b9c1006116fb1da0f64156313ad2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.5 (2016-11-06)
2
+
3
+ Enhancements:
4
+
5
+ * Support jsonpath bracket notation
6
+
1
7
  # 0.1.4 (2016-10-26)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -19,17 +19,23 @@ A filter plugin for Embulk to cast column type.
19
19
 
20
20
  See [example.csv](./example/example.csv) and [example.yml](./example/example.yml).
21
21
 
22
- ## JSONPath (like) name
22
+ ## JSONPath
23
23
 
24
24
  For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as:
25
25
 
26
26
  ```
27
- $.payload.key1
28
- $.payload.array[0]
29
- $.payload.array[*]
27
+ name: $.payload.key1
28
+ name: "$.payload.array[0]"
29
+ name: "$.payload.array[*]"
30
+ name: $['payload']['key1.key2']
30
31
  ```
31
32
 
32
- NOTE: JSONPath syntax is not fully supported
33
+ Following operators of JSONPath are not supported:
34
+
35
+ * Multiple properties such as `['name','name']`
36
+ * Multiple array indexes such as `[1,2]`
37
+ * Array slice such as `[1:2]`
38
+ * Filter expression such as `[?(<expression>)]`
33
39
 
34
40
  ## ToDo
35
41
 
data/build.gradle CHANGED
@@ -13,13 +13,14 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.4"
16
+ version = "0.1.5"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.8.+"
22
22
  provided "org.embulk:embulk-core:0.8.+"
23
+ compile "io.github.medjed:JsonPathCompiler:0.1.1"
23
24
 
24
25
  testCompile "junit:junit:4.+"
25
26
  testCompile "org.embulk:embulk-core:0.8.+:tests"
@@ -0,0 +1,41 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: ''
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: timestamp, type: string}
13
+ - {name: "null", type: string}
14
+ - {name: long, type: string}
15
+ - {name: string, type: string}
16
+ - {name: double, type: string}
17
+ - {name: json1, type: string}
18
+ - {name: json2, type: string}
19
+ - {name: array_str, type: string}
20
+ - {name: array_int, type: string}
21
+ - {name: ignore, type: string}
22
+ - {name: boolean, type: string}
23
+ filters:
24
+ - type: typecast
25
+ columns:
26
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
27
+ - {name: "null", type: long}
28
+ - {name: long, type: long}
29
+ - {name: string, type: string}
30
+ - {name: double, type: double}
31
+ - {name: json1, type: json}
32
+ - {name: json2, type: json}
33
+ - {name: array_str, type: json}
34
+ - {name: array_int, type: json}
35
+ - {name: boolean, type: boolean}
36
+ - {name: "$.json1.string", type: long}
37
+ - {name: "$.json2.long", type: long}
38
+ - {name: "$.array_str[0]", type: long}
39
+ - {name: "$.array_int[*]", type: long}
40
+ out:
41
+ type: "null"
@@ -1,5 +1,9 @@
1
1
  package org.embulk.filter.typecast;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Utils;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
7
  import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
4
8
  import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
5
9
 
@@ -61,7 +65,7 @@ class ColumnCaster
61
65
  {
62
66
  // columnName => TimestampParser
63
67
  for (ColumnConfig columnConfig : task.getColumns()) {
64
- if (columnConfig.getName().startsWith("$.")) {
68
+ if (PathCompiler.isProbablyJsonPath(columnConfig.getName())) {
65
69
  continue; // type: json columns do not support type: timestamp
66
70
  }
67
71
  Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
@@ -76,7 +80,7 @@ class ColumnCaster
76
80
  {
77
81
  // columnName => TimestampFormatter
78
82
  for (ColumnConfig columnConfig : task.getColumns()) {
79
- if (columnConfig.getName().startsWith("$.")) {
83
+ if (PathCompiler.isProbablyJsonPath(columnConfig.getName())) {
80
84
  continue; // type: json columns do not have type: timestamp
81
85
  }
82
86
  Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
@@ -200,7 +204,8 @@ class ColumnCaster
200
204
  }
201
205
  else if (outputType instanceof JsonType) {
202
206
  Value jsonValue = StringCast.asJson(value);
203
- String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
207
+ String name = outputColumn.getName();
208
+ String jsonPath = new StringBuilder("$").append(PropertyPathToken.getPathFragment(name)).toString();
204
209
  Value castedValue = jsonVisitor.visit(jsonPath, jsonValue);
205
210
  pageBuilder.setJson(outputColumn, castedValue);
206
211
  }
@@ -238,7 +243,8 @@ class ColumnCaster
238
243
 
239
244
  public void setFromJson(Column outputColumn, Value value)
240
245
  {
241
- String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
246
+ String name = outputColumn.getName();
247
+ String jsonPath = new StringBuilder("$").append(PropertyPathToken.getPathFragment(name)).toString();
242
248
  Value castedValue = jsonVisitor.visit(jsonPath, value);
243
249
  Type outputType = outputColumn.getType();
244
250
  if (outputType instanceof BooleanType) {
@@ -0,0 +1,78 @@
1
+ package org.embulk.filter.typecast;
2
+
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
10
+ import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
11
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
12
+ import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
13
+ import org.embulk.config.ConfigException;
14
+
15
+ public class JsonPathUtil
16
+ {
17
+ private JsonPathUtil() {}
18
+
19
+ public static String getColumnName(String jsonPath)
20
+ {
21
+ Path compiledPath;
22
+ try {
23
+ compiledPath = PathCompiler.compile(jsonPath);
24
+ }
25
+ catch (InvalidPathException e) {
26
+ throw new ConfigException(String.format("jsonpath %s, %s", jsonPath, e.getMessage()));
27
+ }
28
+ PathToken pathToken = compiledPath.getRoot();
29
+ pathToken = pathToken.next(); // skip $
30
+ return ((PropertyPathToken) pathToken).getProperties().get(0);
31
+ }
32
+
33
+ public static void assertJsonPathFormat(String path)
34
+ {
35
+ Path compiledPath;
36
+ try {
37
+ compiledPath = PathCompiler.compile(path);
38
+ }
39
+ catch (InvalidPathException e) {
40
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
41
+ }
42
+ PathToken pathToken = compiledPath.getRoot();
43
+ while (true) {
44
+ assertSupportedPathToken(pathToken, path);
45
+ if (pathToken.isLeaf()) {
46
+ break;
47
+ }
48
+ pathToken = pathToken.next();
49
+ }
50
+ }
51
+
52
+ protected static void assertSupportedPathToken(PathToken pathToken, String path)
53
+ {
54
+ if (pathToken instanceof ArrayPathToken) {
55
+ ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
56
+ assertSupportedArrayPathToken(arrayIndexOperation, path);
57
+ }
58
+ else if (pathToken instanceof ScanPathToken) {
59
+ throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
60
+ }
61
+ else if (pathToken instanceof FunctionPathToken) {
62
+ throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
63
+ }
64
+ else if (pathToken instanceof PredicatePathToken) {
65
+ throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
66
+ }
67
+ }
68
+
69
+ protected static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
70
+ {
71
+ if (arrayIndexOperation == null) {
72
+ throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
73
+ }
74
+ else if (!arrayIndexOperation.isSingleIndexOperation()) {
75
+ throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
76
+ }
77
+ }
78
+ }
@@ -1,5 +1,10 @@
1
1
  package org.embulk.filter.typecast;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
8
  import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
4
9
  import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
5
10
 
@@ -33,20 +38,33 @@ public class JsonVisitor
33
38
  this.inputSchema = inputSchema;
34
39
  this.outputSchema = outputSchema;
35
40
 
41
+ assertJsonPathFromat();
36
42
  buildShouldVisitSet();
37
43
  buildJsonPathTypeMap();
38
44
  }
39
45
 
46
+ private void assertJsonPathFromat()
47
+ {
48
+ for (ColumnConfig columnConfig : task.getColumns()) {
49
+ String name = columnConfig.getName();
50
+ if (! PathCompiler.isProbablyJsonPath(name)) {
51
+ continue;
52
+ }
53
+ JsonPathUtil.assertJsonPathFormat(name);
54
+ }
55
+ }
56
+
40
57
  private void buildJsonPathTypeMap()
41
58
  {
42
59
  // json path => Type
43
60
  for (ColumnConfig columnConfig : task.getColumns()) {
44
61
  String name = columnConfig.getName();
45
- if (!name.startsWith("$.")) {
62
+ if (! PathCompiler.isProbablyJsonPath(name)) {
46
63
  continue;
47
64
  }
65
+ Path compiledPath = PathCompiler.compile(name);
48
66
  Type type = columnConfig.getType();
49
- this.jsonPathTypeMap.put(name, type);
67
+ this.jsonPathTypeMap.put(compiledPath.toString(), type);
50
68
  }
51
69
  }
52
70
 
@@ -55,26 +73,15 @@ public class JsonVisitor
55
73
  // json partial path => Boolean to avoid unnecessary type: json visit
56
74
  for (ColumnConfig columnConfig : task.getColumns()) {
57
75
  String name = columnConfig.getName();
58
- if (!name.startsWith("$.")) {
76
+ if (! PathCompiler.isProbablyJsonPath(name)) {
59
77
  continue;
60
78
  }
61
- String[] parts = name.split("\\.");
79
+ PathToken parts = PathCompiler.compile(name).getRoot();
62
80
  StringBuilder partialPath = new StringBuilder("$");
63
- for (int i = 1; i < parts.length; i++) {
64
- if (parts[i].contains("[")) {
65
- String[] arrayParts = parts[i].split("\\[");
66
- partialPath.append(".").append(arrayParts[0]);
67
- this.shouldVisitSet.add(partialPath.toString());
68
- for (int j = 1; j < arrayParts.length; j++) {
69
- // Supports both [0] and [*]
70
- partialPath.append("[").append(arrayParts[j]);
71
- this.shouldVisitSet.add(partialPath.toString());
72
- }
73
- }
74
- else {
75
- partialPath.append(".").append(parts[i]);
76
- this.shouldVisitSet.add(partialPath.toString());
77
- }
81
+ while (! parts.isLeaf()) {
82
+ parts = parts.next(); // first next() skips "$"
83
+ partialPath.append(parts.getPathFragment());
84
+ this.shouldVisitSet.add(partialPath.toString());
78
85
  }
79
86
  }
80
87
  }
@@ -118,7 +125,8 @@ public class JsonVisitor
118
125
  int size = arrayValue.size();
119
126
  Value[] newValue = new Value[size];
120
127
  for (int i = 0; i < size; i++) {
121
- String k = new StringBuilder(rootPath).append("[").append(Integer.toString(i)).append("]").toString();
128
+ String pathFragment = ArrayPathToken.getPathFragment(i);
129
+ String k = new StringBuilder(rootPath).append(pathFragment).toString();
122
130
  if (!shouldVisit(k)) {
123
131
  k = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too
124
132
  }
@@ -135,7 +143,8 @@ public class JsonVisitor
135
143
  for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
136
144
  Value k = entry.getKey();
137
145
  Value v = entry.getValue();
138
- String newPath = new StringBuilder(rootPath).append(".").append(k.asStringValue().asString()).toString();
146
+ String pathFragment = PropertyPathToken.getPathFragment(k.asStringValue().asString());
147
+ String newPath = new StringBuilder(rootPath).append(pathFragment).toString();
139
148
  Value r = visit(newPath, v);
140
149
  newValue[i++] = k;
141
150
  newValue[i++] = r;
@@ -2,6 +2,7 @@ package org.embulk.filter.typecast;
2
2
 
3
3
  import com.google.common.base.Optional;
4
4
  import com.google.common.collect.ImmutableList;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
5
6
  import org.embulk.config.Config;
6
7
  import org.embulk.config.ConfigDefault;
7
8
  import org.embulk.config.ConfigException;
@@ -91,10 +92,10 @@ public class TypecastFilterPlugin implements FilterPlugin
91
92
  // throw if column does not exist
92
93
  for (ColumnConfig columnConfig : columnConfigs) {
93
94
  String name = columnConfig.getName();
94
- if (name.startsWith("$.")) { // check only top level column name
95
- String firstName = name.split("\\.", 3)[1];
96
- String firstNameWithoutArray = firstName.split("\\[")[0];
97
- inputSchema.lookupColumn(firstNameWithoutArray);
95
+ if (PathCompiler.isProbablyJsonPath(name)) {
96
+ // check only top level column name
97
+ String columnName = JsonPathUtil.getColumnName(name);
98
+ inputSchema.lookupColumn(columnName);
98
99
  }
99
100
  else {
100
101
  inputSchema.lookupColumn(name);
@@ -103,7 +104,7 @@ public class TypecastFilterPlugin implements FilterPlugin
103
104
  // throw if timestamp is specified in json path
104
105
  for (ColumnConfig columnConfig : columnConfigs) {
105
106
  String name = columnConfig.getName();
106
- if (name.startsWith("$.") && columnConfig.getType() instanceof TimestampType) {
107
+ if (PathCompiler.isProbablyJsonPath(name) && columnConfig.getType() instanceof TimestampType) {
107
108
  throw new ConfigException(String.format("embulk-filter-typecast: timestamp type is not supported in json column: \"%s\"", name));
108
109
  }
109
110
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-typecast
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-26 00:00:00.000000000 Z
11
+ date: 2016-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -54,6 +54,7 @@ files:
54
54
  - config/checkstyle/checkstyle.xml
55
55
  - example/empty.yml
56
56
  - example/example.csv
57
+ - example/example.yml
57
58
  - example/from_string.txt
58
59
  - example/from_string.yml
59
60
  - example/jsoncast.json
@@ -70,6 +71,7 @@ files:
70
71
  - src/main/java/org/embulk/filter/typecast/ColumnCaster.java
71
72
  - src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java
72
73
  - src/main/java/org/embulk/filter/typecast/JsonCaster.java
74
+ - src/main/java/org/embulk/filter/typecast/JsonPathUtil.java
73
75
  - src/main/java/org/embulk/filter/typecast/JsonVisitor.java
74
76
  - src/main/java/org/embulk/filter/typecast/TypecastFilterPlugin.java
75
77
  - src/main/java/org/embulk/filter/typecast/cast/BooleanCast.java
@@ -84,7 +86,13 @@ files:
84
86
  - src/test/java/org/embulk/filter/typecast/cast/TestLongCast.java
85
87
  - src/test/java/org/embulk/filter/typecast/cast/TestStringCast.java
86
88
  - src/test/java/org/embulk/filter/typecast/cast/TestTimestampCast.java
87
- - classpath/embulk-filter-typecast-0.1.4.jar
89
+ - classpath/accessors-smart-1.1.jar
90
+ - classpath/asm-5.0.3.jar
91
+ - classpath/commons-lang3-3.4.jar
92
+ - classpath/embulk-filter-typecast-0.1.5.jar
93
+ - classpath/json-smart-2.2.1.jar
94
+ - classpath/JsonPathCompiler-0.1.1.jar
95
+ - classpath/slf4j-api-1.7.21.jar
88
96
  homepage: https://github.com/sonots/embulk-filter-typecast
89
97
  licenses:
90
98
  - MIT