embulk-filter-expand_json 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a7c9576abd91635ad11888a88fba5f14e576383e
4
- data.tar.gz: 6a80bd358b9056620075fc16f1f7ae59a5a911c9
3
+ metadata.gz: 07943e1a2bf6a447bccb93cbd9ec0412f00382f0
4
+ data.tar.gz: 0051fdf7de1702ddd169afb6d9a4fe9ce53e0ded
5
5
  SHA512:
6
- metadata.gz: 63ae1b2d8c541497ba9bc0c892a403b30c32e6ac8222eb6bc70d09675c0fc9ec2e26a6b5fbd9443e7d5e2ee479b6bbaa8efaf8d8e1f4995b839f9fd1d4600089
7
- data.tar.gz: 73e53f220990bfe91249eceef14450039c29461a20821963e281d99462fb76fdf6583784f4270301c0a46f6b805259143dd1473d787f21a9c18306718885f6d8
6
+ metadata.gz: 860428fccd8b863e1070ef0e23f657c88e5312031aa222588e51b20c7caa394b2425f26dfc3322fdf69dbc0d3984ff524e3969666cb9a53c68672ead92fd8c25
7
+ data.tar.gz: d862d2cd98f348242f78a04d5cc2d4117cd4d3923428e181a40b778b509aaf37630abe97158a0f1a8d9351bffcd990aa8675ee5076f522575d86972c8f4bf6d2
@@ -0,0 +1,6 @@
1
+ 0.0.5 (2016-03-04)
2
+ ==================
3
+ - [Fix] Avoid `NullPointerException` if a column specified as `json_column_name` option doesn't exist in input schema.
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/6 from @muga
5
+ - [Fix] Migrate for Embulk v0.8.x
6
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/7
data/README.md CHANGED
@@ -63,3 +63,7 @@ filters:
63
63
  ```
64
64
  $ ./gradlew gem # -t to watch change of files and rebuild continuously
65
65
  ```
66
+
67
+ ## Contributor
68
+ - @Civitaspo
69
+ - @muga
@@ -4,6 +4,7 @@ plugins {
4
4
  id "com.github.kt3k.coveralls" version "2.4.0"
5
5
  id "jacoco"
6
6
  id "java"
7
+ id "checkstyle"
7
8
  }
8
9
  import com.github.jrubygradle.JRubyExec
9
10
  repositories {
@@ -14,16 +15,16 @@ configurations {
14
15
  provided
15
16
  }
16
17
 
17
- version = "0.0.3"
18
+ version = "0.0.5"
18
19
  sourceCompatibility = 1.7
19
20
  targetCompatibility = 1.7
20
21
 
21
22
  dependencies {
22
- compile "org.embulk:embulk-core:0.7.+"
23
- provided "org.embulk:embulk-core:0.7.+"
23
+ compile "org.embulk:embulk-core:0.8.6"
24
+ provided "org.embulk:embulk-core:0.8.6"
24
25
  compile "com.jayway.jsonpath:json-path:2.+"
25
26
  testCompile "junit:junit:4.+"
26
- testCompile "org.embulk:embulk-core:0.7.+:tests"
27
+ testCompile "org.embulk:embulk-core:0.8.6:tests"
27
28
  }
28
29
 
29
30
  jacocoTestReport {
@@ -40,6 +41,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
40
41
  }
41
42
  clean { delete "classpath" }
42
43
 
44
+ checkstyle {
45
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
46
+ toolVersion = '6.14.1'
47
+ }
48
+ checkstyleMain {
49
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
50
+ ignoreFailures = true
51
+ }
52
+ checkstyleTest {
53
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
54
+ ignoreFailures = true
55
+ }
56
+ task checkstyle(type: Checkstyle) {
57
+ classpath = sourceSets.main.output + sourceSets.test.output
58
+ source = sourceSets.main.allJava + sourceSets.test.allJava
59
+ }
60
+
43
61
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
44
62
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
45
63
  script "${project.name}.gemspec"
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Wed Jan 13 12:41:02 JST 2016
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
@@ -46,6 +46,10 @@ public class ExpandJsonFilterPlugin
46
46
  FilterPlugin.Control control)
47
47
  {
48
48
  PluginTask task = config.loadConfig(PluginTask.class);
49
+
50
+ // check if the specified json column exists or not
51
+ inputSchema.lookupColumn(task.getJsonColumnName());
52
+
49
53
  Schema outputSchema = buildOutputSchema(task, inputSchema);
50
54
  control.run(task.dump(), outputSchema);
51
55
  }
@@ -65,14 +69,16 @@ public class ExpandJsonFilterPlugin
65
69
  int i = 0; // columns index
66
70
  for (Column inputColumn: inputSchema.getColumns()) {
67
71
  if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
68
- logger.info("removed column: name: {}, type: {}",
72
+ logger.info("removed column: name: {}, type: {}, index: {}",
69
73
  inputColumn.getName(),
70
- inputColumn.getType());
74
+ inputColumn.getType(),
75
+ inputColumn.getIndex());
71
76
  for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
72
- logger.info("added column: name: {}, type: {}, options: {}",
77
+ logger.info("added column: name: {}, type: {}, options: {}, index: {}",
73
78
  expandedColumnConfig.getName(),
74
79
  expandedColumnConfig.getType(),
75
- expandedColumnConfig.getOption());
80
+ expandedColumnConfig.getOption(),
81
+ i);
76
82
  Column outputColumn = new Column(i++,
77
83
  expandedColumnConfig.getName(),
78
84
  expandedColumnConfig.getType());
@@ -80,6 +86,10 @@ public class ExpandJsonFilterPlugin
80
86
  }
81
87
  }
82
88
  else {
89
+ logger.info("unchanged column: name: {}, type: {}, index: {}",
90
+ inputColumn.getName(),
91
+ inputColumn.getType(),
92
+ i);
83
93
  Column outputColumn = new Column(i++,
84
94
  inputColumn.getName(),
85
95
  inputColumn.getType());
@@ -2,12 +2,13 @@ package org.embulk.filter.expand_json;
2
2
 
3
3
  import com.fasterxml.jackson.core.JsonProcessingException;
4
4
  import com.fasterxml.jackson.databind.ObjectMapper;
5
+ import com.google.common.base.Optional;
5
6
  import com.google.common.base.Throwables;
6
7
  import com.google.common.collect.ImmutableList;
7
- import com.google.common.collect.Maps;
8
8
  import com.jayway.jsonpath.Configuration;
9
9
  import com.jayway.jsonpath.JsonPath;
10
10
  import com.jayway.jsonpath.Option;
11
+ import com.jayway.jsonpath.ParseContext;
11
12
  import com.jayway.jsonpath.ReadContext;
12
13
  import org.embulk.spi.Column;
13
14
  import org.embulk.spi.ColumnConfig;
@@ -22,7 +23,6 @@ import org.embulk.spi.type.Types;
22
23
  import org.joda.time.DateTimeZone;
23
24
  import org.slf4j.Logger;
24
25
 
25
- import java.util.HashMap;
26
26
  import java.util.List;
27
27
  import java.util.Map;
28
28
 
@@ -34,49 +34,167 @@ import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
34
34
  public class FilteredPageOutput
35
35
  implements PageOutput
36
36
  {
37
+ private class ExpandedColumn
38
+ {
39
+ private final String key;
40
+ private final Column column;
41
+ private final String jsonPath;
42
+ private final Optional<TimestampParser> timestampParser;
43
+
44
+ ExpandedColumn(String key,
45
+ Column column,
46
+ String jsonPath,
47
+ Optional<TimestampParser> timestampParser)
48
+ {
49
+ this.key = key;
50
+ this.column = column;
51
+ this.jsonPath = jsonPath;
52
+ this.timestampParser = timestampParser;
53
+ }
54
+
55
+ public String getKey()
56
+ {
57
+ return key;
58
+ }
59
+
60
+ public Column getColumn()
61
+ {
62
+ return column;
63
+ }
64
+
65
+ public String getJsonPath()
66
+ {
67
+ return jsonPath;
68
+ }
69
+
70
+ public Optional<TimestampParser> getTimestampParser()
71
+ {
72
+ return timestampParser;
73
+ }
74
+ }
75
+
76
+ private class UnchangedColumn
77
+ {
78
+ private final String key;
79
+ private final Column inputColumn;
80
+ private final Column outputColumn;
81
+
82
+ UnchangedColumn(String key, Column inputColumn, Column outputColumn)
83
+ {
84
+ this.key = key;
85
+ this.inputColumn = inputColumn;
86
+ this.outputColumn = outputColumn;
87
+ }
88
+
89
+ public String getKey()
90
+ {
91
+ return key;
92
+ }
93
+
94
+ public Column getInputColumn()
95
+ {
96
+ return inputColumn;
97
+ }
98
+
99
+ public Column getOutputColumn()
100
+ {
101
+ return outputColumn;
102
+ }
103
+ }
104
+
105
+
37
106
  private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
38
- private final String jsonPathRoot;
39
- private final List<Column> inputColumnsExceptExpandedJsonColumn;
40
- private final List<Column> expandedJsonColumns;
41
- private final HashMap<String, TimestampParser> timestampParserHashMap;
107
+ private final List<UnchangedColumn> unchangedColumns;
108
+ private final List<ExpandedColumn> expandedColumns;
42
109
  private final Column jsonColumn;
43
110
  private final PageReader pageReader;
44
- private final Schema inputSchema;
45
- private final Schema outputSchema;
46
111
  private final PageBuilder pageBuilder;
47
- private final PageOutput pageOutput;
112
+ private final ObjectMapper objectMapper = new ObjectMapper();
113
+ private final ParseContext parseContext;
48
114
 
49
- FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
115
+ private List<ExpandedColumn> initializeExpandedColumns(PluginTask task,
116
+ Schema outputSchema)
50
117
  {
51
- this.jsonPathRoot = task.getRoot();
118
+ ImmutableList.Builder<ExpandedColumn> expandedJsonColumnsBuilder = ImmutableList.builder();
119
+ for (Column outputColumn : outputSchema.getColumns()) {
120
+ for (ColumnConfig expandedColumnConfig : task.getExpandedColumns()) {
121
+ if (outputColumn.getName().equals(expandedColumnConfig.getName())) {
52
122
 
53
- ImmutableList.Builder<Column> inputColumnsExceptExpandedJsonColumnBuilder = ImmutableList.builder();
54
- ImmutableList.Builder<Column> expandedJsonColumnsBuilder = ImmutableList.builder();
55
- for (Column column : outputSchema.getColumns()) {
56
- if (inputSchema.getColumns().contains(column)) {
57
- inputColumnsExceptExpandedJsonColumnBuilder.add(column);
123
+ TimestampParser timestampParser = null;
124
+ if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
125
+ String format;
126
+ if (expandedColumnConfig.getOption().has("format")) {
127
+ format = expandedColumnConfig.getOption().get(String.class, "format");
128
+ }
129
+ else {
130
+ format = task.getDefaultTimestampFormat();
131
+ }
132
+ DateTimeZone timezone = DateTimeZone.forID(task.getTimeZone());
133
+ timestampParser = new TimestampParser(task.getJRuby(), format, timezone);
134
+ }
135
+
136
+ ExpandedColumn expandedColumn = new ExpandedColumn(outputColumn.getName(),
137
+ outputColumn,
138
+ task.getRoot() + outputColumn.getName(),
139
+ Optional.fromNullable(timestampParser));
140
+ expandedJsonColumnsBuilder.add(expandedColumn);
141
+ }
58
142
  }
59
- else {
60
- expandedJsonColumnsBuilder.add(column);
143
+ }
144
+ return expandedJsonColumnsBuilder.build();
145
+ }
146
+
147
+ private List<UnchangedColumn> initializeUnchangedColumns(Schema inputSchema,
148
+ Schema outputSchema,
149
+ Column excludeColumn)
150
+ {
151
+ ImmutableList.Builder<UnchangedColumn> unchangedColumnsBuilder = ImmutableList.builder();
152
+ for (Column outputColumn : outputSchema.getColumns()) {
153
+ for (Column inputColumn : inputSchema.getColumns()) {
154
+ if (inputColumn.getName().equals(outputColumn.getName()) &&
155
+ !excludeColumn.getName().equals(outputColumn.getName())) {
156
+
157
+ UnchangedColumn unchangedColumn = new UnchangedColumn(outputColumn.getName(),
158
+ inputColumn,
159
+ outputColumn);
160
+ unchangedColumnsBuilder.add(unchangedColumn);
161
+ }
61
162
  }
62
163
  }
63
- this.inputColumnsExceptExpandedJsonColumn = inputColumnsExceptExpandedJsonColumnBuilder.build();
64
- this.expandedJsonColumns = expandedJsonColumnsBuilder.build();
164
+ return unchangedColumnsBuilder.build();
165
+ }
65
166
 
66
- Column temporaryJsonColumn = null;
167
+ private Column initializeJsonColumn(PluginTask task, Schema inputSchema)
168
+ {
169
+ Column jsonColumn = null;
67
170
  for (Column column: inputSchema.getColumns()) {
68
171
  if (column.getName().contentEquals(task.getJsonColumnName())) {
69
- temporaryJsonColumn = column;
172
+ jsonColumn = column;
70
173
  }
71
174
  }
72
- this.jsonColumn = temporaryJsonColumn;
175
+ return jsonColumn;
176
+ }
177
+
178
+ private ParseContext initializeParseContext()
179
+ {
180
+ Configuration conf = Configuration.defaultConfiguration();
181
+ conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
182
+ conf = conf.addOptions(Option.SUPPRESS_EXCEPTIONS);
183
+ return JsonPath.using(conf);
184
+ }
185
+
186
+ FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
187
+ {
188
+ this.jsonColumn = initializeJsonColumn(task, inputSchema);
189
+ this.unchangedColumns = initializeUnchangedColumns(inputSchema,
190
+ outputSchema,
191
+ jsonColumn);
192
+ this.expandedColumns = initializeExpandedColumns(task,
193
+ outputSchema);
73
194
 
74
- this.timestampParserHashMap = buildTimestampParserHashMap(task);
75
195
  this.pageReader = new PageReader(inputSchema);
76
- this.inputSchema = inputSchema;
77
- this.outputSchema = outputSchema;
78
- this.pageOutput = pageOutput;
79
196
  this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput);
197
+ this.parseContext = initializeParseContext();
80
198
  }
81
199
 
82
200
  @Override
@@ -86,8 +204,8 @@ public class FilteredPageOutput
86
204
  pageReader.setPage(page);
87
205
 
88
206
  while (pageReader.nextRecord()) {
89
- setInputColumnsExceptExpandedJsonColumns(pageBuilder, inputColumnsExceptExpandedJsonColumn);
90
- setExpandedJsonColumns(pageBuilder, jsonColumn, expandedJsonColumns, timestampParserHashMap);
207
+ setExpandedJsonColumns();
208
+ setUnchangedColumns();
91
209
  pageBuilder.addRecord();
92
210
  }
93
211
  }
@@ -101,7 +219,6 @@ public class FilteredPageOutput
101
219
  public void finish()
102
220
  {
103
221
  pageBuilder.finish();
104
- pageOutput.finish();
105
222
  }
106
223
 
107
224
  @Override
@@ -109,116 +226,97 @@ public class FilteredPageOutput
109
226
  {
110
227
  pageReader.close();
111
228
  pageBuilder.close();
112
- pageOutput.close();
113
229
  }
114
230
 
115
- private HashMap<String, TimestampParser> buildTimestampParserHashMap(PluginTask task)
116
- {
117
- final HashMap<String, TimestampParser> timestampParserHashMap = Maps.newHashMap();
118
- for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
119
- if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
120
- String format;
121
- if (expandedColumnConfig.getOption().has("format")) {
122
- format = expandedColumnConfig.getOption().get(String.class, "format");
123
- }
124
- else {
125
- format = task.getDefaultTimestampFormat();
126
- }
127
- DateTimeZone timezone = DateTimeZone.forID(task.getTimeZone());
128
- TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
129
-
130
- String columnName = expandedColumnConfig.getName();
131
-
132
- timestampParserHashMap.put(columnName, parser);
133
- }
134
- }
135
-
136
- return timestampParserHashMap;
137
- }
138
231
 
139
- private void setInputColumnsExceptExpandedJsonColumns(PageBuilder pageBuilder, List<Column> inputColumnsExceptExpandedJsonColumn) {
140
- for (Column inputColumn: inputColumnsExceptExpandedJsonColumn) {
232
+ private void setUnchangedColumns() {
233
+ for (UnchangedColumn unchangedColumn : unchangedColumns) {
234
+ Column inputColumn = unchangedColumn.getInputColumn();
235
+ Column outputColumn = unchangedColumn.getOutputColumn();
236
+
141
237
  if (pageReader.isNull(inputColumn)) {
142
- pageBuilder.setNull(inputColumn);
238
+ pageBuilder.setNull(outputColumn);
143
239
  continue;
144
240
  }
145
241
 
146
- if (Types.STRING.equals(inputColumn.getType())) {
147
- pageBuilder.setString(inputColumn, pageReader.getString(inputColumn));
242
+ if (Types.STRING.equals(outputColumn.getType())) {
243
+ pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
148
244
  }
149
- else if (Types.BOOLEAN.equals(inputColumn.getType())) {
150
- pageBuilder.setBoolean(inputColumn, pageReader.getBoolean(inputColumn));
245
+ else if (Types.BOOLEAN.equals(outputColumn.getType())) {
246
+ pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
151
247
  }
152
- else if (Types.DOUBLE.equals(inputColumn.getType())) {
153
- pageBuilder.setDouble(inputColumn, pageReader.getDouble(inputColumn));
248
+ else if (Types.DOUBLE.equals(outputColumn.getType())) {
249
+ pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
154
250
  }
155
- else if (Types.LONG.equals(inputColumn.getType())) {
156
- pageBuilder.setLong(inputColumn, pageReader.getLong(inputColumn));
251
+ else if (Types.LONG.equals(outputColumn.getType())) {
252
+ pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
157
253
  }
158
- else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
159
- pageBuilder.setTimestamp(inputColumn, pageReader.getTimestamp(inputColumn));
254
+ else if (Types.TIMESTAMP.equals(outputColumn.getType())) {
255
+ pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
160
256
  }
161
257
  }
162
258
  }
163
259
 
164
- private void setExpandedJsonColumns(PageBuilder pageBuilder, Column originalJsonColumn, List<Column> expandedJsonColumns, HashMap<String, TimestampParser> timestampParserMap)
260
+ private void setExpandedJsonColumns()
165
261
  throws JsonProcessingException
166
262
  {
167
263
  final ReadContext json;
168
- if (pageReader.isNull(originalJsonColumn)) {
264
+ if (pageReader.isNull(jsonColumn)) {
169
265
  json = null;
170
266
  }
171
267
  else {
172
- String jsonObject = pageReader.getString(originalJsonColumn);
173
- Configuration conf = Configuration.defaultConfiguration();
174
- conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
175
- conf = conf.addOptions(Option.SUPPRESS_EXCEPTIONS);
176
- json = JsonPath.using(conf).parse(jsonObject);
268
+ String jsonObject = pageReader.getString(jsonColumn);
269
+ json = parseContext.parse(jsonObject);
177
270
  }
178
271
 
179
- for (Column expandedJsonColumn: expandedJsonColumns) {
272
+ for (ExpandedColumn expandedJsonColumn: expandedColumns) {
180
273
  if (json == null) {
181
- pageBuilder.setNull(expandedJsonColumn);
274
+ pageBuilder.setNull(expandedJsonColumn.getColumn());
182
275
  continue;
183
276
  }
184
277
 
185
- Object value = json.read(jsonPathRoot + expandedJsonColumn.getName());
186
- final String finalValue = writeJsonPathValueAsString(value);
278
+ Object value = json.read(expandedJsonColumn.getJsonPath());
279
+ final String finalValue = convertJsonNodeAsString(value);
187
280
  if (finalValue == null) {
188
- pageBuilder.setNull(expandedJsonColumn);
281
+ pageBuilder.setNull(expandedJsonColumn.getColumn());
189
282
  continue;
190
283
  }
191
284
 
192
- if (Types.STRING.equals(expandedJsonColumn.getType())) {
193
- pageBuilder.setString(expandedJsonColumn, finalValue);
285
+ if (Types.STRING.equals(expandedJsonColumn.getColumn().getType())) {
286
+ pageBuilder.setString(expandedJsonColumn.getColumn(), finalValue);
194
287
  }
195
- else if (Types.BOOLEAN.equals(expandedJsonColumn.getType())) {
196
- pageBuilder.setBoolean(expandedJsonColumn, Boolean.parseBoolean(finalValue));
288
+ else if (Types.BOOLEAN.equals(expandedJsonColumn.getColumn().getType())) {
289
+ pageBuilder.setBoolean(expandedJsonColumn.getColumn(), Boolean.parseBoolean(finalValue));
197
290
  }
198
- else if (Types.DOUBLE.equals(expandedJsonColumn.getType())) {
199
- pageBuilder.setDouble(expandedJsonColumn, Double.parseDouble(finalValue));
291
+ else if (Types.DOUBLE.equals(expandedJsonColumn.getColumn().getType())) {
292
+ pageBuilder.setDouble(expandedJsonColumn.getColumn(), Double.parseDouble(finalValue));
200
293
  }
201
- else if (Types.LONG.equals(expandedJsonColumn.getType())) {
202
- pageBuilder.setLong(expandedJsonColumn, Long.parseLong(finalValue));
294
+ else if (Types.LONG.equals(expandedJsonColumn.getColumn().getType())) {
295
+ pageBuilder.setLong(expandedJsonColumn.getColumn(), Long.parseLong(finalValue));
203
296
  }
204
- else if (Types.TIMESTAMP.equals(expandedJsonColumn.getType())) {
205
- TimestampParser parser = timestampParserMap.get(expandedJsonColumn.getName());
206
- pageBuilder.setTimestamp(expandedJsonColumn, parser.parse(finalValue));
297
+ else if (Types.TIMESTAMP.equals(expandedJsonColumn.getColumn().getType())) {
298
+ if (expandedJsonColumn.getTimestampParser().isPresent()) {
299
+ TimestampParser parser = expandedJsonColumn.getTimestampParser().get();
300
+ pageBuilder.setTimestamp(expandedJsonColumn.getColumn(), parser.parse(finalValue));
301
+ }
302
+ else {
303
+ throw new RuntimeException("TimestampParser is absent for column:" + expandedJsonColumn.getKey());
304
+ }
207
305
  }
208
306
  }
209
307
  }
210
308
 
211
- private String writeJsonPathValueAsString(Object value)
309
+ private String convertJsonNodeAsString(Object value)
212
310
  throws JsonProcessingException
213
311
  {
214
312
  if (value == null) {
215
313
  return null;
216
314
  }
217
315
  else if (value instanceof List) {
218
- return new ObjectMapper().writeValueAsString(value);
316
+ return objectMapper.writeValueAsString(value);
219
317
  }
220
318
  else if (value instanceof Map) {
221
- return new ObjectMapper().writeValueAsString(value);
319
+ return objectMapper.writeValueAsString(value);
222
320
  }
223
321
  else if (value instanceof String) {
224
322
  return (String) value;
@@ -227,5 +325,4 @@ public class FilteredPageOutput
227
325
  return String.valueOf(value);
228
326
  }
229
327
  }
230
-
231
328
  }
@@ -18,6 +18,7 @@ import org.embulk.spi.PageOutput;
18
18
  import org.embulk.spi.PageReader;
19
19
  import org.embulk.spi.PageTestUtils;
20
20
  import org.embulk.spi.Schema;
21
+ import org.embulk.spi.SchemaConfigException;
21
22
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
22
23
  import org.junit.Before;
23
24
  import org.junit.Rule;
@@ -40,7 +41,9 @@ public class TestExpandJsonFilterPlugin
40
41
 
41
42
  private final Schema schema = Schema.builder()
42
43
  .add("_c0", STRING)
44
+ .add("_c1", STRING)
43
45
  .build();
46
+ private final String c1Data = "_c1_data";
44
47
  private ExpandJsonFilterPlugin expandJsonFilterPlugin;
45
48
 
46
49
  @Before
@@ -89,6 +92,26 @@ public class TestExpandJsonFilterPlugin
89
92
  config.loadConfig(PluginTask.class);
90
93
  }
91
94
 
95
+ @Test
96
+ public void testThrowExceptionInvalidJsonColumnName()
97
+ {
98
+ String configYaml = "" +
99
+ "type: expand_json\n" +
100
+ "json_column_name: not_exist\n" +
101
+ "expanded_columns:\n" +
102
+ " - {name: _c1, type: string}";
103
+ ConfigSource config = getConfigFromYaml(configYaml);
104
+
105
+ exception.expect(SchemaConfigException.class);
106
+ expandJsonFilterPlugin.transaction(config, schema, new Control() {
107
+ @Override
108
+ public void run(TaskSource taskSource, Schema schema)
109
+ {
110
+ // do nothing
111
+ }
112
+ });
113
+ }
114
+
92
115
  @Test
93
116
  public void testThrowExceptionAbsentExpandedColumns()
94
117
  {
@@ -139,7 +162,8 @@ public class TestExpandJsonFilterPlugin
139
162
  " - {name: _j2, type: long}\n" +
140
163
  " - {name: _j3, type: timestamp}\n" +
141
164
  " - {name: _j4, type: double}\n" +
142
- " - {name: _j5, type: string}\n";
165
+ " - {name: _j5, type: string}\n" +
166
+ " - {name: _c0, type: string}\n";
143
167
 
144
168
  ConfigSource config = getConfigFromYaml(configYaml);
145
169
 
@@ -148,13 +172,15 @@ public class TestExpandJsonFilterPlugin
148
172
  @Override
149
173
  public void run(TaskSource taskSource, Schema outputSchema)
150
174
  {
151
- assertEquals(5, outputSchema.getColumnCount());
175
+ assertEquals(7, outputSchema.getColumnCount());
152
176
 
153
177
  Column new_j1 = outputSchema.getColumn(0);
154
178
  Column new_j2 = outputSchema.getColumn(1);
155
179
  Column new_j3 = outputSchema.getColumn(2);
156
180
  Column new_j4 = outputSchema.getColumn(3);
157
181
  Column new_j5 = outputSchema.getColumn(4);
182
+ Column new_c0 = outputSchema.getColumn(5);
183
+ Column old_c1 = outputSchema.getColumn(6);
158
184
 
159
185
  assertEquals("_j1", new_j1.getName());
160
186
  assertEquals(BOOLEAN, new_j1.getType());
@@ -166,6 +192,11 @@ public class TestExpandJsonFilterPlugin
166
192
  assertEquals(DOUBLE, new_j4.getType());
167
193
  assertEquals("_j5", new_j5.getName());
168
194
  assertEquals(STRING, new_j5.getType());
195
+ assertEquals("_c0", new_c0.getName());
196
+ assertEquals(STRING, new_c0.getType());
197
+ assertEquals("_c1", old_c1.getName());
198
+ assertEquals(STRING, old_c1.getType());
199
+
169
200
  }
170
201
  });
171
202
  }
@@ -191,7 +222,8 @@ public class TestExpandJsonFilterPlugin
191
222
  " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
192
223
  " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
193
224
  " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
194
- " - {name: '_j7.store.book[2].author', type: string}\n";
225
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
226
+ " - {name: _c0, type: string}\n";
195
227
 
196
228
  ConfigSource config = getConfigFromYaml(configYaml);
197
229
 
@@ -292,12 +324,13 @@ public class TestExpandJsonFilterPlugin
292
324
  }
293
325
  */
294
326
  );
327
+ builder.put("_c0", "v12");
295
328
 
296
329
  String data = convertToJsonString(builder.build());
297
330
 
298
331
  for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
299
332
  schema,
300
- data)) {
333
+ data, c1Data)) {
301
334
  pageOutput.add(page);
302
335
  }
303
336
 
@@ -323,7 +356,8 @@ public class TestExpandJsonFilterPlugin
323
356
  pageReader.getString(outputSchema.getColumn(7)));
324
357
  assertEquals("[\"Nigel Rees\",\"Herman Melville\"]",
325
358
  pageReader.getString(outputSchema.getColumn(8)));
326
- assertEquals("[" +
359
+ assertEquals("" +
360
+ "[" +
327
361
  "{" +
328
362
  "\"author\":\"Herman Melville\"," +
329
363
  "\"title\":\"Moby Dick\"," +
@@ -342,6 +376,10 @@ public class TestExpandJsonFilterPlugin
342
376
  pageReader.getString(outputSchema.getColumn(10)));
343
377
  assertEquals("Herman Melville",
344
378
  pageReader.getString(outputSchema.getColumn(11)));
379
+ assertEquals("v12",
380
+ pageReader.getString(outputSchema.getColumn(12)));
381
+ assertEquals(c1Data,
382
+ pageReader.getString(outputSchema.getColumn(13)));
345
383
  }
346
384
  }
347
385
  });
@@ -373,7 +411,7 @@ public class TestExpandJsonFilterPlugin
373
411
  String data = getBrokenJsonString();
374
412
  for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
375
413
  schema,
376
- data)) {
414
+ data, c1Data)) {
377
415
  exception.expect(InvalidJsonException.class);
378
416
  exception.expectMessage("Unexpected End Of File position 12: null");
379
417
  pageOutput.add(page);
@@ -391,5 +429,4 @@ public class TestExpandJsonFilterPlugin
391
429
  }
392
430
  });
393
431
  }
394
-
395
432
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-16 00:00:00.000000000 Z
11
+ date: 2016-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -47,9 +47,12 @@ extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
49
  - .travis.yml
50
+ - CHANGELOG.md
50
51
  - LICENSE.txt
51
52
  - README.md
52
53
  - build.gradle
54
+ - config/checkstyle/checkstyle.xml
55
+ - config/checkstyle/default.xml
53
56
  - example/config.yml
54
57
  - example/data.tsv
55
58
  - gradle/wrapper/gradle-wrapper.jar
@@ -60,11 +63,12 @@ files:
60
63
  - src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java
61
64
  - src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java
62
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
63
- - classpath/asm-1.0.2.jar
64
- - classpath/asm-3.3.1.jar
65
- - classpath/embulk-filter-expand_json-0.0.3.jar
66
- - classpath/json-path-2.0.0.jar
67
- - classpath/json-smart-2.1.1.jar
66
+ - classpath/accessors-smart-1.1.jar
67
+ - classpath/asm-5.0.3.jar
68
+ - classpath/embulk-filter-expand_json-0.0.5.jar
69
+ - classpath/json-path-2.2.0.jar
70
+ - classpath/json-smart-2.2.1.jar
71
+ - classpath/slf4j-api-1.7.16.jar
68
72
  homepage: https://github.com/civitaspo/embulk-filter-expand_json
69
73
  licenses:
70
74
  - MIT