embulk-filter-expand_json 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a7c9576abd91635ad11888a88fba5f14e576383e
4
- data.tar.gz: 6a80bd358b9056620075fc16f1f7ae59a5a911c9
3
+ metadata.gz: 07943e1a2bf6a447bccb93cbd9ec0412f00382f0
4
+ data.tar.gz: 0051fdf7de1702ddd169afb6d9a4fe9ce53e0ded
5
5
  SHA512:
6
- metadata.gz: 63ae1b2d8c541497ba9bc0c892a403b30c32e6ac8222eb6bc70d09675c0fc9ec2e26a6b5fbd9443e7d5e2ee479b6bbaa8efaf8d8e1f4995b839f9fd1d4600089
7
- data.tar.gz: 73e53f220990bfe91249eceef14450039c29461a20821963e281d99462fb76fdf6583784f4270301c0a46f6b805259143dd1473d787f21a9c18306718885f6d8
6
+ metadata.gz: 860428fccd8b863e1070ef0e23f657c88e5312031aa222588e51b20c7caa394b2425f26dfc3322fdf69dbc0d3984ff524e3969666cb9a53c68672ead92fd8c25
7
+ data.tar.gz: d862d2cd98f348242f78a04d5cc2d4117cd4d3923428e181a40b778b509aaf37630abe97158a0f1a8d9351bffcd990aa8675ee5076f522575d86972c8f4bf6d2
@@ -0,0 +1,6 @@
1
+ 0.0.5 (2016-03-04)
2
+ ==================
3
+ - [Fix] Avoid `NullPointerException` if a column specified as `json_column_name` option doesn't exist in input schema.
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/6 from @muga
5
+ - [Fix] Migrate for Embulk v0.8.x
6
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/7
data/README.md CHANGED
@@ -63,3 +63,7 @@ filters:
63
63
  ```
64
64
  $ ./gradlew gem # -t to watch change of files and rebuild continuously
65
65
  ```
66
+
67
+ ## Contributor
68
+ - @Civitaspo
69
+ - @muga
@@ -4,6 +4,7 @@ plugins {
4
4
  id "com.github.kt3k.coveralls" version "2.4.0"
5
5
  id "jacoco"
6
6
  id "java"
7
+ id "checkstyle"
7
8
  }
8
9
  import com.github.jrubygradle.JRubyExec
9
10
  repositories {
@@ -14,16 +15,16 @@ configurations {
14
15
  provided
15
16
  }
16
17
 
17
- version = "0.0.3"
18
+ version = "0.0.5"
18
19
  sourceCompatibility = 1.7
19
20
  targetCompatibility = 1.7
20
21
 
21
22
  dependencies {
22
- compile "org.embulk:embulk-core:0.7.+"
23
- provided "org.embulk:embulk-core:0.7.+"
23
+ compile "org.embulk:embulk-core:0.8.6"
24
+ provided "org.embulk:embulk-core:0.8.6"
24
25
  compile "com.jayway.jsonpath:json-path:2.+"
25
26
  testCompile "junit:junit:4.+"
26
- testCompile "org.embulk:embulk-core:0.7.+:tests"
27
+ testCompile "org.embulk:embulk-core:0.8.6:tests"
27
28
  }
28
29
 
29
30
  jacocoTestReport {
@@ -40,6 +41,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
40
41
  }
41
42
  clean { delete "classpath" }
42
43
 
44
+ checkstyle {
45
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
46
+ toolVersion = '6.14.1'
47
+ }
48
+ checkstyleMain {
49
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
50
+ ignoreFailures = true
51
+ }
52
+ checkstyleTest {
53
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
54
+ ignoreFailures = true
55
+ }
56
+ task checkstyle(type: Checkstyle) {
57
+ classpath = sourceSets.main.output + sourceSets.test.output
58
+ source = sourceSets.main.allJava + sourceSets.test.allJava
59
+ }
60
+
43
61
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
44
62
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
45
63
  script "${project.name}.gemspec"
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Wed Jan 13 12:41:02 JST 2016
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
@@ -46,6 +46,10 @@ public class ExpandJsonFilterPlugin
46
46
  FilterPlugin.Control control)
47
47
  {
48
48
  PluginTask task = config.loadConfig(PluginTask.class);
49
+
50
+ // check if the specified json column exists or not
51
+ inputSchema.lookupColumn(task.getJsonColumnName());
52
+
49
53
  Schema outputSchema = buildOutputSchema(task, inputSchema);
50
54
  control.run(task.dump(), outputSchema);
51
55
  }
@@ -65,14 +69,16 @@ public class ExpandJsonFilterPlugin
65
69
  int i = 0; // columns index
66
70
  for (Column inputColumn: inputSchema.getColumns()) {
67
71
  if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
68
- logger.info("removed column: name: {}, type: {}",
72
+ logger.info("removed column: name: {}, type: {}, index: {}",
69
73
  inputColumn.getName(),
70
- inputColumn.getType());
74
+ inputColumn.getType(),
75
+ inputColumn.getIndex());
71
76
  for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
72
- logger.info("added column: name: {}, type: {}, options: {}",
77
+ logger.info("added column: name: {}, type: {}, options: {}, index: {}",
73
78
  expandedColumnConfig.getName(),
74
79
  expandedColumnConfig.getType(),
75
- expandedColumnConfig.getOption());
80
+ expandedColumnConfig.getOption(),
81
+ i);
76
82
  Column outputColumn = new Column(i++,
77
83
  expandedColumnConfig.getName(),
78
84
  expandedColumnConfig.getType());
@@ -80,6 +86,10 @@ public class ExpandJsonFilterPlugin
80
86
  }
81
87
  }
82
88
  else {
89
+ logger.info("unchanged column: name: {}, type: {}, index: {}",
90
+ inputColumn.getName(),
91
+ inputColumn.getType(),
92
+ i);
83
93
  Column outputColumn = new Column(i++,
84
94
  inputColumn.getName(),
85
95
  inputColumn.getType());
@@ -2,12 +2,13 @@ package org.embulk.filter.expand_json;
2
2
 
3
3
  import com.fasterxml.jackson.core.JsonProcessingException;
4
4
  import com.fasterxml.jackson.databind.ObjectMapper;
5
+ import com.google.common.base.Optional;
5
6
  import com.google.common.base.Throwables;
6
7
  import com.google.common.collect.ImmutableList;
7
- import com.google.common.collect.Maps;
8
8
  import com.jayway.jsonpath.Configuration;
9
9
  import com.jayway.jsonpath.JsonPath;
10
10
  import com.jayway.jsonpath.Option;
11
+ import com.jayway.jsonpath.ParseContext;
11
12
  import com.jayway.jsonpath.ReadContext;
12
13
  import org.embulk.spi.Column;
13
14
  import org.embulk.spi.ColumnConfig;
@@ -22,7 +23,6 @@ import org.embulk.spi.type.Types;
22
23
  import org.joda.time.DateTimeZone;
23
24
  import org.slf4j.Logger;
24
25
 
25
- import java.util.HashMap;
26
26
  import java.util.List;
27
27
  import java.util.Map;
28
28
 
@@ -34,49 +34,167 @@ import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
34
34
  public class FilteredPageOutput
35
35
  implements PageOutput
36
36
  {
37
+ private class ExpandedColumn
38
+ {
39
+ private final String key;
40
+ private final Column column;
41
+ private final String jsonPath;
42
+ private final Optional<TimestampParser> timestampParser;
43
+
44
+ ExpandedColumn(String key,
45
+ Column column,
46
+ String jsonPath,
47
+ Optional<TimestampParser> timestampParser)
48
+ {
49
+ this.key = key;
50
+ this.column = column;
51
+ this.jsonPath = jsonPath;
52
+ this.timestampParser = timestampParser;
53
+ }
54
+
55
+ public String getKey()
56
+ {
57
+ return key;
58
+ }
59
+
60
+ public Column getColumn()
61
+ {
62
+ return column;
63
+ }
64
+
65
+ public String getJsonPath()
66
+ {
67
+ return jsonPath;
68
+ }
69
+
70
+ public Optional<TimestampParser> getTimestampParser()
71
+ {
72
+ return timestampParser;
73
+ }
74
+ }
75
+
76
+ private class UnchangedColumn
77
+ {
78
+ private final String key;
79
+ private final Column inputColumn;
80
+ private final Column outputColumn;
81
+
82
+ UnchangedColumn(String key, Column inputColumn, Column outputColumn)
83
+ {
84
+ this.key = key;
85
+ this.inputColumn = inputColumn;
86
+ this.outputColumn = outputColumn;
87
+ }
88
+
89
+ public String getKey()
90
+ {
91
+ return key;
92
+ }
93
+
94
+ public Column getInputColumn()
95
+ {
96
+ return inputColumn;
97
+ }
98
+
99
+ public Column getOutputColumn()
100
+ {
101
+ return outputColumn;
102
+ }
103
+ }
104
+
105
+
37
106
  private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
38
- private final String jsonPathRoot;
39
- private final List<Column> inputColumnsExceptExpandedJsonColumn;
40
- private final List<Column> expandedJsonColumns;
41
- private final HashMap<String, TimestampParser> timestampParserHashMap;
107
+ private final List<UnchangedColumn> unchangedColumns;
108
+ private final List<ExpandedColumn> expandedColumns;
42
109
  private final Column jsonColumn;
43
110
  private final PageReader pageReader;
44
- private final Schema inputSchema;
45
- private final Schema outputSchema;
46
111
  private final PageBuilder pageBuilder;
47
- private final PageOutput pageOutput;
112
+ private final ObjectMapper objectMapper = new ObjectMapper();
113
+ private final ParseContext parseContext;
48
114
 
49
- FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
115
+ private List<ExpandedColumn> initializeExpandedColumns(PluginTask task,
116
+ Schema outputSchema)
50
117
  {
51
- this.jsonPathRoot = task.getRoot();
118
+ ImmutableList.Builder<ExpandedColumn> expandedJsonColumnsBuilder = ImmutableList.builder();
119
+ for (Column outputColumn : outputSchema.getColumns()) {
120
+ for (ColumnConfig expandedColumnConfig : task.getExpandedColumns()) {
121
+ if (outputColumn.getName().equals(expandedColumnConfig.getName())) {
52
122
 
53
- ImmutableList.Builder<Column> inputColumnsExceptExpandedJsonColumnBuilder = ImmutableList.builder();
54
- ImmutableList.Builder<Column> expandedJsonColumnsBuilder = ImmutableList.builder();
55
- for (Column column : outputSchema.getColumns()) {
56
- if (inputSchema.getColumns().contains(column)) {
57
- inputColumnsExceptExpandedJsonColumnBuilder.add(column);
123
+ TimestampParser timestampParser = null;
124
+ if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
125
+ String format;
126
+ if (expandedColumnConfig.getOption().has("format")) {
127
+ format = expandedColumnConfig.getOption().get(String.class, "format");
128
+ }
129
+ else {
130
+ format = task.getDefaultTimestampFormat();
131
+ }
132
+ DateTimeZone timezone = DateTimeZone.forID(task.getTimeZone());
133
+ timestampParser = new TimestampParser(task.getJRuby(), format, timezone);
134
+ }
135
+
136
+ ExpandedColumn expandedColumn = new ExpandedColumn(outputColumn.getName(),
137
+ outputColumn,
138
+ task.getRoot() + outputColumn.getName(),
139
+ Optional.fromNullable(timestampParser));
140
+ expandedJsonColumnsBuilder.add(expandedColumn);
141
+ }
58
142
  }
59
- else {
60
- expandedJsonColumnsBuilder.add(column);
143
+ }
144
+ return expandedJsonColumnsBuilder.build();
145
+ }
146
+
147
+ private List<UnchangedColumn> initializeUnchangedColumns(Schema inputSchema,
148
+ Schema outputSchema,
149
+ Column excludeColumn)
150
+ {
151
+ ImmutableList.Builder<UnchangedColumn> unchangedColumnsBuilder = ImmutableList.builder();
152
+ for (Column outputColumn : outputSchema.getColumns()) {
153
+ for (Column inputColumn : inputSchema.getColumns()) {
154
+ if (inputColumn.getName().equals(outputColumn.getName()) &&
155
+ !excludeColumn.getName().equals(outputColumn.getName())) {
156
+
157
+ UnchangedColumn unchangedColumn = new UnchangedColumn(outputColumn.getName(),
158
+ inputColumn,
159
+ outputColumn);
160
+ unchangedColumnsBuilder.add(unchangedColumn);
161
+ }
61
162
  }
62
163
  }
63
- this.inputColumnsExceptExpandedJsonColumn = inputColumnsExceptExpandedJsonColumnBuilder.build();
64
- this.expandedJsonColumns = expandedJsonColumnsBuilder.build();
164
+ return unchangedColumnsBuilder.build();
165
+ }
65
166
 
66
- Column temporaryJsonColumn = null;
167
+ private Column initializeJsonColumn(PluginTask task, Schema inputSchema)
168
+ {
169
+ Column jsonColumn = null;
67
170
  for (Column column: inputSchema.getColumns()) {
68
171
  if (column.getName().contentEquals(task.getJsonColumnName())) {
69
- temporaryJsonColumn = column;
172
+ jsonColumn = column;
70
173
  }
71
174
  }
72
- this.jsonColumn = temporaryJsonColumn;
175
+ return jsonColumn;
176
+ }
177
+
178
+ private ParseContext initializeParseContext()
179
+ {
180
+ Configuration conf = Configuration.defaultConfiguration();
181
+ conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
182
+ conf = conf.addOptions(Option.SUPPRESS_EXCEPTIONS);
183
+ return JsonPath.using(conf);
184
+ }
185
+
186
+ FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
187
+ {
188
+ this.jsonColumn = initializeJsonColumn(task, inputSchema);
189
+ this.unchangedColumns = initializeUnchangedColumns(inputSchema,
190
+ outputSchema,
191
+ jsonColumn);
192
+ this.expandedColumns = initializeExpandedColumns(task,
193
+ outputSchema);
73
194
 
74
- this.timestampParserHashMap = buildTimestampParserHashMap(task);
75
195
  this.pageReader = new PageReader(inputSchema);
76
- this.inputSchema = inputSchema;
77
- this.outputSchema = outputSchema;
78
- this.pageOutput = pageOutput;
79
196
  this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput);
197
+ this.parseContext = initializeParseContext();
80
198
  }
81
199
 
82
200
  @Override
@@ -86,8 +204,8 @@ public class FilteredPageOutput
86
204
  pageReader.setPage(page);
87
205
 
88
206
  while (pageReader.nextRecord()) {
89
- setInputColumnsExceptExpandedJsonColumns(pageBuilder, inputColumnsExceptExpandedJsonColumn);
90
- setExpandedJsonColumns(pageBuilder, jsonColumn, expandedJsonColumns, timestampParserHashMap);
207
+ setExpandedJsonColumns();
208
+ setUnchangedColumns();
91
209
  pageBuilder.addRecord();
92
210
  }
93
211
  }
@@ -101,7 +219,6 @@ public class FilteredPageOutput
101
219
  public void finish()
102
220
  {
103
221
  pageBuilder.finish();
104
- pageOutput.finish();
105
222
  }
106
223
 
107
224
  @Override
@@ -109,116 +226,97 @@ public class FilteredPageOutput
109
226
  {
110
227
  pageReader.close();
111
228
  pageBuilder.close();
112
- pageOutput.close();
113
229
  }
114
230
 
115
- private HashMap<String, TimestampParser> buildTimestampParserHashMap(PluginTask task)
116
- {
117
- final HashMap<String, TimestampParser> timestampParserHashMap = Maps.newHashMap();
118
- for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
119
- if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
120
- String format;
121
- if (expandedColumnConfig.getOption().has("format")) {
122
- format = expandedColumnConfig.getOption().get(String.class, "format");
123
- }
124
- else {
125
- format = task.getDefaultTimestampFormat();
126
- }
127
- DateTimeZone timezone = DateTimeZone.forID(task.getTimeZone());
128
- TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
129
-
130
- String columnName = expandedColumnConfig.getName();
131
-
132
- timestampParserHashMap.put(columnName, parser);
133
- }
134
- }
135
-
136
- return timestampParserHashMap;
137
- }
138
231
 
139
- private void setInputColumnsExceptExpandedJsonColumns(PageBuilder pageBuilder, List<Column> inputColumnsExceptExpandedJsonColumn) {
140
- for (Column inputColumn: inputColumnsExceptExpandedJsonColumn) {
232
+ private void setUnchangedColumns() {
233
+ for (UnchangedColumn unchangedColumn : unchangedColumns) {
234
+ Column inputColumn = unchangedColumn.getInputColumn();
235
+ Column outputColumn = unchangedColumn.getOutputColumn();
236
+
141
237
  if (pageReader.isNull(inputColumn)) {
142
- pageBuilder.setNull(inputColumn);
238
+ pageBuilder.setNull(outputColumn);
143
239
  continue;
144
240
  }
145
241
 
146
- if (Types.STRING.equals(inputColumn.getType())) {
147
- pageBuilder.setString(inputColumn, pageReader.getString(inputColumn));
242
+ if (Types.STRING.equals(outputColumn.getType())) {
243
+ pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
148
244
  }
149
- else if (Types.BOOLEAN.equals(inputColumn.getType())) {
150
- pageBuilder.setBoolean(inputColumn, pageReader.getBoolean(inputColumn));
245
+ else if (Types.BOOLEAN.equals(outputColumn.getType())) {
246
+ pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
151
247
  }
152
- else if (Types.DOUBLE.equals(inputColumn.getType())) {
153
- pageBuilder.setDouble(inputColumn, pageReader.getDouble(inputColumn));
248
+ else if (Types.DOUBLE.equals(outputColumn.getType())) {
249
+ pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
154
250
  }
155
- else if (Types.LONG.equals(inputColumn.getType())) {
156
- pageBuilder.setLong(inputColumn, pageReader.getLong(inputColumn));
251
+ else if (Types.LONG.equals(outputColumn.getType())) {
252
+ pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
157
253
  }
158
- else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
159
- pageBuilder.setTimestamp(inputColumn, pageReader.getTimestamp(inputColumn));
254
+ else if (Types.TIMESTAMP.equals(outputColumn.getType())) {
255
+ pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
160
256
  }
161
257
  }
162
258
  }
163
259
 
164
- private void setExpandedJsonColumns(PageBuilder pageBuilder, Column originalJsonColumn, List<Column> expandedJsonColumns, HashMap<String, TimestampParser> timestampParserMap)
260
+ private void setExpandedJsonColumns()
165
261
  throws JsonProcessingException
166
262
  {
167
263
  final ReadContext json;
168
- if (pageReader.isNull(originalJsonColumn)) {
264
+ if (pageReader.isNull(jsonColumn)) {
169
265
  json = null;
170
266
  }
171
267
  else {
172
- String jsonObject = pageReader.getString(originalJsonColumn);
173
- Configuration conf = Configuration.defaultConfiguration();
174
- conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
175
- conf = conf.addOptions(Option.SUPPRESS_EXCEPTIONS);
176
- json = JsonPath.using(conf).parse(jsonObject);
268
+ String jsonObject = pageReader.getString(jsonColumn);
269
+ json = parseContext.parse(jsonObject);
177
270
  }
178
271
 
179
- for (Column expandedJsonColumn: expandedJsonColumns) {
272
+ for (ExpandedColumn expandedJsonColumn: expandedColumns) {
180
273
  if (json == null) {
181
- pageBuilder.setNull(expandedJsonColumn);
274
+ pageBuilder.setNull(expandedJsonColumn.getColumn());
182
275
  continue;
183
276
  }
184
277
 
185
- Object value = json.read(jsonPathRoot + expandedJsonColumn.getName());
186
- final String finalValue = writeJsonPathValueAsString(value);
278
+ Object value = json.read(expandedJsonColumn.getJsonPath());
279
+ final String finalValue = convertJsonNodeAsString(value);
187
280
  if (finalValue == null) {
188
- pageBuilder.setNull(expandedJsonColumn);
281
+ pageBuilder.setNull(expandedJsonColumn.getColumn());
189
282
  continue;
190
283
  }
191
284
 
192
- if (Types.STRING.equals(expandedJsonColumn.getType())) {
193
- pageBuilder.setString(expandedJsonColumn, finalValue);
285
+ if (Types.STRING.equals(expandedJsonColumn.getColumn().getType())) {
286
+ pageBuilder.setString(expandedJsonColumn.getColumn(), finalValue);
194
287
  }
195
- else if (Types.BOOLEAN.equals(expandedJsonColumn.getType())) {
196
- pageBuilder.setBoolean(expandedJsonColumn, Boolean.parseBoolean(finalValue));
288
+ else if (Types.BOOLEAN.equals(expandedJsonColumn.getColumn().getType())) {
289
+ pageBuilder.setBoolean(expandedJsonColumn.getColumn(), Boolean.parseBoolean(finalValue));
197
290
  }
198
- else if (Types.DOUBLE.equals(expandedJsonColumn.getType())) {
199
- pageBuilder.setDouble(expandedJsonColumn, Double.parseDouble(finalValue));
291
+ else if (Types.DOUBLE.equals(expandedJsonColumn.getColumn().getType())) {
292
+ pageBuilder.setDouble(expandedJsonColumn.getColumn(), Double.parseDouble(finalValue));
200
293
  }
201
- else if (Types.LONG.equals(expandedJsonColumn.getType())) {
202
- pageBuilder.setLong(expandedJsonColumn, Long.parseLong(finalValue));
294
+ else if (Types.LONG.equals(expandedJsonColumn.getColumn().getType())) {
295
+ pageBuilder.setLong(expandedJsonColumn.getColumn(), Long.parseLong(finalValue));
203
296
  }
204
- else if (Types.TIMESTAMP.equals(expandedJsonColumn.getType())) {
205
- TimestampParser parser = timestampParserMap.get(expandedJsonColumn.getName());
206
- pageBuilder.setTimestamp(expandedJsonColumn, parser.parse(finalValue));
297
+ else if (Types.TIMESTAMP.equals(expandedJsonColumn.getColumn().getType())) {
298
+ if (expandedJsonColumn.getTimestampParser().isPresent()) {
299
+ TimestampParser parser = expandedJsonColumn.getTimestampParser().get();
300
+ pageBuilder.setTimestamp(expandedJsonColumn.getColumn(), parser.parse(finalValue));
301
+ }
302
+ else {
303
+ throw new RuntimeException("TimestampParser is absent for column:" + expandedJsonColumn.getKey());
304
+ }
207
305
  }
208
306
  }
209
307
  }
210
308
 
211
- private String writeJsonPathValueAsString(Object value)
309
+ private String convertJsonNodeAsString(Object value)
212
310
  throws JsonProcessingException
213
311
  {
214
312
  if (value == null) {
215
313
  return null;
216
314
  }
217
315
  else if (value instanceof List) {
218
- return new ObjectMapper().writeValueAsString(value);
316
+ return objectMapper.writeValueAsString(value);
219
317
  }
220
318
  else if (value instanceof Map) {
221
- return new ObjectMapper().writeValueAsString(value);
319
+ return objectMapper.writeValueAsString(value);
222
320
  }
223
321
  else if (value instanceof String) {
224
322
  return (String) value;
@@ -227,5 +325,4 @@ public class FilteredPageOutput
227
325
  return String.valueOf(value);
228
326
  }
229
327
  }
230
-
231
328
  }
@@ -18,6 +18,7 @@ import org.embulk.spi.PageOutput;
18
18
  import org.embulk.spi.PageReader;
19
19
  import org.embulk.spi.PageTestUtils;
20
20
  import org.embulk.spi.Schema;
21
+ import org.embulk.spi.SchemaConfigException;
21
22
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
22
23
  import org.junit.Before;
23
24
  import org.junit.Rule;
@@ -40,7 +41,9 @@ public class TestExpandJsonFilterPlugin
40
41
 
41
42
  private final Schema schema = Schema.builder()
42
43
  .add("_c0", STRING)
44
+ .add("_c1", STRING)
43
45
  .build();
46
+ private final String c1Data = "_c1_data";
44
47
  private ExpandJsonFilterPlugin expandJsonFilterPlugin;
45
48
 
46
49
  @Before
@@ -89,6 +92,26 @@ public class TestExpandJsonFilterPlugin
89
92
  config.loadConfig(PluginTask.class);
90
93
  }
91
94
 
95
+ @Test
96
+ public void testThrowExceptionInvalidJsonColumnName()
97
+ {
98
+ String configYaml = "" +
99
+ "type: expand_json\n" +
100
+ "json_column_name: not_exist\n" +
101
+ "expanded_columns:\n" +
102
+ " - {name: _c1, type: string}";
103
+ ConfigSource config = getConfigFromYaml(configYaml);
104
+
105
+ exception.expect(SchemaConfigException.class);
106
+ expandJsonFilterPlugin.transaction(config, schema, new Control() {
107
+ @Override
108
+ public void run(TaskSource taskSource, Schema schema)
109
+ {
110
+ // do nothing
111
+ }
112
+ });
113
+ }
114
+
92
115
  @Test
93
116
  public void testThrowExceptionAbsentExpandedColumns()
94
117
  {
@@ -139,7 +162,8 @@ public class TestExpandJsonFilterPlugin
139
162
  " - {name: _j2, type: long}\n" +
140
163
  " - {name: _j3, type: timestamp}\n" +
141
164
  " - {name: _j4, type: double}\n" +
142
- " - {name: _j5, type: string}\n";
165
+ " - {name: _j5, type: string}\n" +
166
+ " - {name: _c0, type: string}\n";
143
167
 
144
168
  ConfigSource config = getConfigFromYaml(configYaml);
145
169
 
@@ -148,13 +172,15 @@ public class TestExpandJsonFilterPlugin
148
172
  @Override
149
173
  public void run(TaskSource taskSource, Schema outputSchema)
150
174
  {
151
- assertEquals(5, outputSchema.getColumnCount());
175
+ assertEquals(7, outputSchema.getColumnCount());
152
176
 
153
177
  Column new_j1 = outputSchema.getColumn(0);
154
178
  Column new_j2 = outputSchema.getColumn(1);
155
179
  Column new_j3 = outputSchema.getColumn(2);
156
180
  Column new_j4 = outputSchema.getColumn(3);
157
181
  Column new_j5 = outputSchema.getColumn(4);
182
+ Column new_c0 = outputSchema.getColumn(5);
183
+ Column old_c1 = outputSchema.getColumn(6);
158
184
 
159
185
  assertEquals("_j1", new_j1.getName());
160
186
  assertEquals(BOOLEAN, new_j1.getType());
@@ -166,6 +192,11 @@ public class TestExpandJsonFilterPlugin
166
192
  assertEquals(DOUBLE, new_j4.getType());
167
193
  assertEquals("_j5", new_j5.getName());
168
194
  assertEquals(STRING, new_j5.getType());
195
+ assertEquals("_c0", new_c0.getName());
196
+ assertEquals(STRING, new_c0.getType());
197
+ assertEquals("_c1", old_c1.getName());
198
+ assertEquals(STRING, old_c1.getType());
199
+
169
200
  }
170
201
  });
171
202
  }
@@ -191,7 +222,8 @@ public class TestExpandJsonFilterPlugin
191
222
  " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
192
223
  " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
193
224
  " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
194
- " - {name: '_j7.store.book[2].author', type: string}\n";
225
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
226
+ " - {name: _c0, type: string}\n";
195
227
 
196
228
  ConfigSource config = getConfigFromYaml(configYaml);
197
229
 
@@ -292,12 +324,13 @@ public class TestExpandJsonFilterPlugin
292
324
  }
293
325
  */
294
326
  );
327
+ builder.put("_c0", "v12");
295
328
 
296
329
  String data = convertToJsonString(builder.build());
297
330
 
298
331
  for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
299
332
  schema,
300
- data)) {
333
+ data, c1Data)) {
301
334
  pageOutput.add(page);
302
335
  }
303
336
 
@@ -323,7 +356,8 @@ public class TestExpandJsonFilterPlugin
323
356
  pageReader.getString(outputSchema.getColumn(7)));
324
357
  assertEquals("[\"Nigel Rees\",\"Herman Melville\"]",
325
358
  pageReader.getString(outputSchema.getColumn(8)));
326
- assertEquals("[" +
359
+ assertEquals("" +
360
+ "[" +
327
361
  "{" +
328
362
  "\"author\":\"Herman Melville\"," +
329
363
  "\"title\":\"Moby Dick\"," +
@@ -342,6 +376,10 @@ public class TestExpandJsonFilterPlugin
342
376
  pageReader.getString(outputSchema.getColumn(10)));
343
377
  assertEquals("Herman Melville",
344
378
  pageReader.getString(outputSchema.getColumn(11)));
379
+ assertEquals("v12",
380
+ pageReader.getString(outputSchema.getColumn(12)));
381
+ assertEquals(c1Data,
382
+ pageReader.getString(outputSchema.getColumn(13)));
345
383
  }
346
384
  }
347
385
  });
@@ -373,7 +411,7 @@ public class TestExpandJsonFilterPlugin
373
411
  String data = getBrokenJsonString();
374
412
  for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
375
413
  schema,
376
- data)) {
414
+ data, c1Data)) {
377
415
  exception.expect(InvalidJsonException.class);
378
416
  exception.expectMessage("Unexpected End Of File position 12: null");
379
417
  pageOutput.add(page);
@@ -391,5 +429,4 @@ public class TestExpandJsonFilterPlugin
391
429
  }
392
430
  });
393
431
  }
394
-
395
432
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-16 00:00:00.000000000 Z
11
+ date: 2016-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -47,9 +47,12 @@ extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
49
  - .travis.yml
50
+ - CHANGELOG.md
50
51
  - LICENSE.txt
51
52
  - README.md
52
53
  - build.gradle
54
+ - config/checkstyle/checkstyle.xml
55
+ - config/checkstyle/default.xml
53
56
  - example/config.yml
54
57
  - example/data.tsv
55
58
  - gradle/wrapper/gradle-wrapper.jar
@@ -60,11 +63,12 @@ files:
60
63
  - src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java
61
64
  - src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java
62
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
63
- - classpath/asm-1.0.2.jar
64
- - classpath/asm-3.3.1.jar
65
- - classpath/embulk-filter-expand_json-0.0.3.jar
66
- - classpath/json-path-2.0.0.jar
67
- - classpath/json-smart-2.1.1.jar
66
+ - classpath/accessors-smart-1.1.jar
67
+ - classpath/asm-5.0.3.jar
68
+ - classpath/embulk-filter-expand_json-0.0.5.jar
69
+ - classpath/json-path-2.2.0.jar
70
+ - classpath/json-smart-2.2.1.jar
71
+ - classpath/slf4j-api-1.7.16.jar
68
72
  homepage: https://github.com/civitaspo/embulk-filter-expand_json
69
73
  licenses:
70
74
  - MIT