embulk-output-parquet 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 887eeba43ad66ae6159048504e253542162a8988
4
- data.tar.gz: a04dba91a7abeecc957d7265f809c9bf1276ae0e
3
+ metadata.gz: d1ef34fa1ab2ea085e926b70700d4bff09e7bb16
4
+ data.tar.gz: d2d9df28c5ed603995193552104466da98d400eb
5
5
  SHA512:
6
- metadata.gz: 56ec1d8ad587e73a97ad769bbaad04793bb28ccf9119c638cde85e6dd38e06626fe8343ecfa9cc2e497cec78724366c2f3d8fdd7656b892c44ad32d9c4d65718
7
- data.tar.gz: a99aa07e1b1507c6d375c4d2a21fa6d99341bf72f4022059eda90c88900f7597f687be5baed3c26728187212f59028e8679906fc8f6bba02795e1927f5e1ddcd
6
+ metadata.gz: 6a48e6ac6438c1cd56bf431b69ea8e980a54bff54290299d2da7a733c8defe4746230ba7afd2c5446b2d2ce8d42aa7cf74b6ca95c6dd8473cfefa278b891813f
7
+ data.tar.gz: adcfe86af5337ab4f41b2eb78ae84a2e20f71d40ad63652029d83cb50368ba40139f59f9f38a1bd8a7e7425c6838f96e39f733bbda6230ca9e37c1ae17b09f80
data/README.md CHANGED
@@ -24,14 +24,21 @@ http://www.embulk.org/docs/built-in.html#csv-formatter-plugin).
24
24
 
25
25
  ## Example
26
26
 
27
+ ```yaml
28
+ out:
29
+ type: parquet
30
+ path_prefix: file:///data/output
31
+ ```
32
+
33
+ ### How to write parquet files into S3
34
+
27
35
  ```yaml
28
36
  out:
29
37
  type: parquet
30
38
  path_prefix: s3a://bucket/keys
31
- extra_configuration:
39
+ extra_configurations:
32
40
  fs.s3a.access.key: 'your_access_key'
33
41
  fs.s3a.secret.key: 'your_secret_access_key'
34
-
35
42
  ```
36
43
 
37
44
  ## Build
data/build.gradle CHANGED
@@ -2,6 +2,7 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id "checkstyle"
5
6
  }
6
7
  import com.github.jrubygradle.JRubyExec
7
8
  repositories {
@@ -13,7 +14,11 @@ configurations {
13
14
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
14
15
  }
15
16
 
16
- version = "0.4.0"
17
+ version = "0.5.0"
18
+
19
+ sourceCompatibility = 1.7
20
+
21
+ targetCompatibility = 1.7
17
22
 
18
23
  dependencies {
19
24
  compile "org.embulk:embulk-core:0.7.10"
@@ -36,6 +41,22 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
36
41
  }
37
42
  clean { delete 'classpath' }
38
43
 
44
+ checkstyle {
45
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
46
+ toolVersion = '6.14.1'
47
+ }
48
+ checkstyleMain {
49
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
50
+ ignoreFailures = true
51
+ }
52
+ checkstyleTest {
53
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
54
+ ignoreFailures = true
55
+ }
56
+ task checkstyle(type: Checkstyle) {
57
+ classpath = sourceSets.main.output + sourceSets.test.output
58
+ source = sourceSets.main.allJava + sourceSets.test.allJava
59
+ }
39
60
  task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
40
61
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
41
62
  script "build/gemspec"
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.7-bin.zip
@@ -20,19 +20,23 @@ import java.util.HashMap;
20
20
  import java.util.List;
21
21
  import java.util.Map;
22
22
 
23
- public class EmbulkWriteSupport extends WriteSupport<PageReader> {
23
+ public class EmbulkWriteSupport
24
+ extends WriteSupport<PageReader>
25
+ {
24
26
  final Schema schema;
25
27
  RecordConsumer consumer;
26
28
  WriteContext writeContext;
27
29
  TimestampFormatter[] timestampFormatters;
28
30
 
29
- public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters) {
31
+ public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters)
32
+ {
30
33
  this.schema = schema;
31
34
  this.timestampFormatters = timestampFormatters;
32
35
  }
33
36
 
34
37
  @Override
35
- public WriteContext init(Configuration configuration) {
38
+ public WriteContext init(Configuration configuration)
39
+ {
36
40
  if (writeContext == null) {
37
41
  init();
38
42
  }
@@ -40,12 +44,14 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
40
44
  }
41
45
 
42
46
  @Override
43
- public void prepareForWrite(RecordConsumer recordConsumer) {
47
+ public void prepareForWrite(RecordConsumer recordConsumer)
48
+ {
44
49
  this.consumer = recordConsumer;
45
50
  }
46
51
 
47
52
  @Override
48
- public void write(PageReader record) {
53
+ public void write(PageReader record)
54
+ {
49
55
  final ColumnVisitor visitor = new ParquetColumnVisitor(record, consumer);
50
56
  consumer.startMessage();
51
57
  for (Column c : schema.getColumns()) {
@@ -58,58 +64,68 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
58
64
  consumer.endMessage();
59
65
  }
60
66
 
61
- private void init() {
67
+ private void init()
68
+ {
62
69
  MessageType messageType = convertSchema(schema);
63
70
  Map<String, String> metadata = new HashMap<>();
64
71
  writeContext = new WriteContext(messageType, metadata);
65
72
  }
66
73
 
67
- private MessageType convertSchema(Schema schema) {
74
+ private MessageType convertSchema(Schema schema)
75
+ {
68
76
  SchemaConvertColumnVisitor visitor = new SchemaConvertColumnVisitor();
69
77
  schema.visitColumns(visitor);
70
78
  String messageName = "embulk";
71
79
  return new MessageType(messageName, visitor.getConvertedFields());
72
80
  }
73
81
 
74
- class ParquetColumnVisitor implements ColumnVisitor {
82
+ class ParquetColumnVisitor
83
+ implements ColumnVisitor
84
+ {
75
85
  final PageReader record;
76
86
  final RecordConsumer consumer;
77
87
 
78
- public ParquetColumnVisitor(PageReader record, RecordConsumer consumer) {
88
+ public ParquetColumnVisitor(PageReader record, RecordConsumer consumer)
89
+ {
79
90
  this.record = record;
80
91
  this.consumer = consumer;
81
92
  }
82
93
 
83
94
  @Override
84
- public void booleanColumn(Column column) {
95
+ public void booleanColumn(Column column)
96
+ {
85
97
  if (!record.isNull(column)) {
86
98
  consumer.addBoolean(record.getBoolean(column));
87
99
  }
88
100
  }
89
101
 
90
102
  @Override
91
- public void longColumn(Column column) {
103
+ public void longColumn(Column column)
104
+ {
92
105
  if (!record.isNull(column)) {
93
106
  consumer.addLong(record.getLong(column));
94
107
  }
95
108
  }
96
109
 
97
110
  @Override
98
- public void doubleColumn(Column column) {
111
+ public void doubleColumn(Column column)
112
+ {
99
113
  if (!record.isNull(column)) {
100
114
  consumer.addDouble(record.getDouble(column));
101
115
  }
102
116
  }
103
117
 
104
118
  @Override
105
- public void stringColumn(Column column) {
119
+ public void stringColumn(Column column)
120
+ {
106
121
  if (!record.isNull(column)) {
107
122
  consumer.addBinary(Binary.fromString(record.getString(column)));
108
123
  }
109
124
  }
110
125
 
111
126
  @Override
112
- public void timestampColumn(Column column) {
127
+ public void timestampColumn(Column column)
128
+ {
113
129
  if (!record.isNull(column)) {
114
130
  Timestamp t = record.getTimestamp(column);
115
131
  String formatted = timestampFormatters[column.getIndex()].format(t);
@@ -118,36 +134,44 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
118
134
  }
119
135
  }
120
136
 
121
- class SchemaConvertColumnVisitor implements ColumnVisitor {
137
+ class SchemaConvertColumnVisitor
138
+ implements ColumnVisitor
139
+ {
122
140
  List<Type> fields = new ArrayList<>();
123
141
 
124
142
  @Override
125
- public void booleanColumn(Column column) {
143
+ public void booleanColumn(Column column)
144
+ {
126
145
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName()));
127
146
  }
128
147
 
129
148
  @Override
130
- public void longColumn(Column column) {
149
+ public void longColumn(Column column)
150
+ {
131
151
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName()));
132
152
  }
133
153
 
134
154
  @Override
135
- public void doubleColumn(Column column) {
155
+ public void doubleColumn(Column column)
156
+ {
136
157
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName()));
137
158
  }
138
159
 
139
160
  @Override
140
- public void stringColumn(Column column) {
161
+ public void stringColumn(Column column)
162
+ {
141
163
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
142
164
  }
143
165
 
144
166
  @Override
145
- public void timestampColumn(Column column) {
167
+ public void timestampColumn(Column column)
168
+ {
146
169
  // formatted as string
147
170
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
148
171
  }
149
172
 
150
- public List<Type> getConvertedFields() {
173
+ public List<Type> getConvertedFields()
174
+ {
151
175
  return fields;
152
176
  }
153
177
  }
@@ -8,23 +8,28 @@ import org.embulk.spi.PageReader;
8
8
  import org.embulk.spi.Schema;
9
9
  import org.embulk.spi.time.TimestampFormatter;
10
10
 
11
- public class EmbulkWriterBuilder extends ParquetWriter.Builder<PageReader, EmbulkWriterBuilder> {
11
+ public class EmbulkWriterBuilder
12
+ extends ParquetWriter.Builder<PageReader, EmbulkWriterBuilder>
13
+ {
12
14
  final Schema schema;
13
15
  final TimestampFormatter[] timestampFormatters;
14
16
 
15
- public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters) {
17
+ public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters)
18
+ {
16
19
  super(file);
17
20
  this.schema = schema;
18
21
  this.timestampFormatters = timestampFormatters;
19
22
  }
20
23
 
21
24
  @Override
22
- protected EmbulkWriterBuilder self() {
25
+ protected EmbulkWriterBuilder self()
26
+ {
23
27
  return this;
24
28
  }
25
29
 
26
30
  @Override
27
- protected WriteSupport<PageReader> getWriteSupport(Configuration conf) {
31
+ protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
32
+ {
28
33
  return new EmbulkWriteSupport(schema, timestampFormatters);
29
34
  }
30
35
  }
@@ -30,9 +30,11 @@ import java.util.Map;
30
30
 
31
31
  @SuppressWarnings("unused")
32
32
  public class ParquetOutputPlugin
33
- implements OutputPlugin {
33
+ implements OutputPlugin
34
+ {
34
35
  public interface PluginTask
35
- extends Task, TimestampFormatter.Task {
36
+ extends Task, TimestampFormatter.Task
37
+ {
36
38
  @Config("path_prefix")
37
39
  String getPathPrefix();
38
40
 
@@ -72,12 +74,14 @@ public class ParquetOutputPlugin
72
74
  }
73
75
 
74
76
  public interface TimestampColumnOption
75
- extends Task, TimestampFormatter.TimestampColumnOption {
77
+ extends Task, TimestampFormatter.TimestampColumnOption
78
+ {
76
79
  }
77
80
 
78
81
  public ConfigDiff transaction(ConfigSource config,
79
- Schema schema, int processorCount,
80
- OutputPlugin.Control control) {
82
+ Schema schema, int processorCount,
83
+ OutputPlugin.Control control)
84
+ {
81
85
  PluginTask task = config.loadConfig(PluginTask.class);
82
86
 
83
87
  //TODO
@@ -87,18 +91,21 @@ public class ParquetOutputPlugin
87
91
  }
88
92
 
89
93
  public ConfigDiff resume(TaskSource taskSource,
90
- Schema schema, int processorCount,
91
- OutputPlugin.Control control) {
94
+ Schema schema, int processorCount,
95
+ OutputPlugin.Control control)
96
+ {
92
97
  throw new UnsupportedOperationException("parquet output plugin does not support resuming");
93
98
  }
94
99
 
95
100
  public void cleanup(TaskSource taskSource,
96
- Schema schema, int processorCount,
97
- List<TaskReport> successTaskReports) {
101
+ Schema schema, int processorCount,
102
+ List<TaskReport> successTaskReports)
103
+ {
98
104
  //TODO
99
105
  }
100
106
 
101
- public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex) {
107
+ public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex)
108
+ {
102
109
  PluginTask task = taskSource.loadTask(PluginTask.class);
103
110
 
104
111
  final PageReader reader = new PageReader(schema);
@@ -107,14 +114,16 @@ public class ParquetOutputPlugin
107
114
  return new ParquetTransactionalPageOutput(reader, writer);
108
115
  }
109
116
 
110
- private String buildPath(PluginTask task, int processorIndex) {
117
+ private String buildPath(PluginTask task, int processorIndex)
118
+ {
111
119
  final String pathPrefix = task.getPathPrefix();
112
120
  final String pathSuffix = task.getFileNameExtension();
113
121
  final String sequenceFormat = task.getSequenceFormat();
114
122
  return pathPrefix + String.format(sequenceFormat, processorIndex) + pathSuffix;
115
123
  }
116
124
 
117
- private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex) {
125
+ private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex)
126
+ {
118
127
  final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
119
128
 
120
129
  final Path path = new Path(buildPath(task, processorIndex));
@@ -138,13 +147,15 @@ public class ParquetOutputPlugin
138
147
  }
139
148
 
140
149
  writer = builder.build();
141
- } catch (IOException e) {
150
+ }
151
+ catch (IOException e) {
142
152
  Throwables.propagate(e);
143
153
  }
144
154
  return writer;
145
155
  }
146
156
 
147
- private Configuration createConfiguration(Map<String, String> extra) {
157
+ private Configuration createConfiguration(Map<String, String> extra)
158
+ {
148
159
  Configuration conf = new Configuration();
149
160
 
150
161
  // Default values
@@ -161,49 +172,59 @@ public class ParquetOutputPlugin
161
172
  return conf;
162
173
  }
163
174
 
164
- class ParquetTransactionalPageOutput implements TransactionalPageOutput {
175
+ class ParquetTransactionalPageOutput
176
+ implements TransactionalPageOutput
177
+ {
165
178
  private PageReader reader;
166
179
  private ParquetWriter<PageReader> writer;
167
180
 
168
- public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer) {
181
+ public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer)
182
+ {
169
183
  this.reader = reader;
170
184
  this.writer = writer;
171
185
  }
172
186
 
173
187
  @Override
174
- public void add(Page page) {
188
+ public void add(Page page)
189
+ {
175
190
  try {
176
191
  reader.setPage(page);
177
192
  while (reader.nextRecord()) {
178
193
  writer.write(reader);
179
194
  }
180
- } catch (IOException e) {
195
+ }
196
+ catch (IOException e) {
181
197
  Throwables.propagate(e);
182
198
  }
183
199
  }
184
200
 
185
201
  @Override
186
- public void finish() {
202
+ public void finish()
203
+ {
187
204
  try {
188
205
  writer.close();
189
206
  writer = null;
190
- } catch (IOException e) {
207
+ }
208
+ catch (IOException e) {
191
209
  Throwables.propagate(e);
192
210
  }
193
211
  }
194
212
 
195
213
  @Override
196
- public void close() {
214
+ public void close()
215
+ {
197
216
  //TODO
198
217
  }
199
218
 
200
219
  @Override
201
- public void abort() {
220
+ public void abort()
221
+ {
202
222
  //TODO
203
223
  }
204
224
 
205
225
  @Override
206
- public TaskReport commit() {
226
+ public TaskReport commit()
227
+ {
207
228
  return Exec.newTaskReport();
208
229
  //TODO
209
230
  }
@@ -10,17 +10,20 @@ import org.junit.Test;
10
10
 
11
11
  import java.lang.reflect.InvocationTargetException;
12
12
  import java.lang.reflect.Method;
13
- import java.util.HashMap;
14
13
  import java.util.Map;
15
14
 
16
- import static org.junit.Assert.*;
15
+ import static org.junit.Assert.assertEquals;
16
+ import static org.junit.Assert.assertFalse;
17
+ import static org.junit.Assert.assertTrue;
17
18
 
18
- public class ParquetOutputPluginTest {
19
+ public class ParquetOutputPluginTest
20
+ {
19
21
  @Rule
20
22
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
21
23
 
22
24
  @Test
23
- public void checkDefaultValues() {
25
+ public void checkDefaultValues()
26
+ {
24
27
  ConfigSource config = Exec.newConfigSource()
25
28
  .set("path_prefix", "test");
26
29
 
@@ -34,14 +37,17 @@ public class ParquetOutputPluginTest {
34
37
  }
35
38
 
36
39
  @Test(expected = ConfigException.class)
37
- public void checkColumnsRequired() {
40
+ public void checkColumnsRequired()
41
+ {
38
42
  ConfigSource config = Exec.newConfigSource();
39
43
 
40
44
  config.loadConfig(ParquetOutputPlugin.PluginTask.class);
41
45
  }
42
46
 
43
47
  @Test
44
- public void checkExtraConfigurations() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
48
+ public void checkExtraConfigurations()
49
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException
50
+ {
45
51
  ConfigSource map = Exec.newConfigSource()
46
52
  .set("foo", "bar");
47
53
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - OKUNO Akihiro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-04 00:00:00.000000000 Z
11
+ date: 2017-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -50,6 +50,8 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
53
55
  - gradle/wrapper/gradle-wrapper.jar
54
56
  - gradle/wrapper/gradle-wrapper.properties
55
57
  - gradlew
@@ -83,7 +85,7 @@ files:
83
85
  - classpath/curator-client-2.7.1.jar
84
86
  - classpath/curator-framework-2.7.1.jar
85
87
  - classpath/curator-recipes-2.7.1.jar
86
- - classpath/embulk-output-parquet-0.4.0.jar
88
+ - classpath/embulk-output-parquet-0.5.0.jar
87
89
  - classpath/gson-2.2.4.jar
88
90
  - classpath/hadoop-annotations-2.7.1.jar
89
91
  - classpath/hadoop-auth-2.7.1.jar
@@ -121,7 +123,7 @@ files:
121
123
  - classpath/jetty-6.1.26.jar
122
124
  - classpath/jetty-util-6.1.26.jar
123
125
  - classpath/jline-0.9.94.jar
124
- - classpath/joda-time-2.9.1.jar
126
+ - classpath/joda-time-2.9.9.jar
125
127
  - classpath/jsch-0.1.42.jar
126
128
  - classpath/jsp-api-2.1.jar
127
129
  - classpath/jsr305-3.0.0.jar