embulk-output-parquet 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 887eeba43ad66ae6159048504e253542162a8988
4
- data.tar.gz: a04dba91a7abeecc957d7265f809c9bf1276ae0e
3
+ metadata.gz: d1ef34fa1ab2ea085e926b70700d4bff09e7bb16
4
+ data.tar.gz: d2d9df28c5ed603995193552104466da98d400eb
5
5
  SHA512:
6
- metadata.gz: 56ec1d8ad587e73a97ad769bbaad04793bb28ccf9119c638cde85e6dd38e06626fe8343ecfa9cc2e497cec78724366c2f3d8fdd7656b892c44ad32d9c4d65718
7
- data.tar.gz: a99aa07e1b1507c6d375c4d2a21fa6d99341bf72f4022059eda90c88900f7597f687be5baed3c26728187212f59028e8679906fc8f6bba02795e1927f5e1ddcd
6
+ metadata.gz: 6a48e6ac6438c1cd56bf431b69ea8e980a54bff54290299d2da7a733c8defe4746230ba7afd2c5446b2d2ce8d42aa7cf74b6ca95c6dd8473cfefa278b891813f
7
+ data.tar.gz: adcfe86af5337ab4f41b2eb78ae84a2e20f71d40ad63652029d83cb50368ba40139f59f9f38a1bd8a7e7425c6838f96e39f733bbda6230ca9e37c1ae17b09f80
data/README.md CHANGED
@@ -24,14 +24,21 @@ http://www.embulk.org/docs/built-in.html#csv-formatter-plugin).
24
24
 
25
25
  ## Example
26
26
 
27
+ ```yaml
28
+ out:
29
+ type: parquet
30
+ path_prefix: file:///data/output
31
+ ```
32
+
33
+ ### How to write parquet files into S3
34
+
27
35
  ```yaml
28
36
  out:
29
37
  type: parquet
30
38
  path_prefix: s3a://bucket/keys
31
- extra_configuration:
39
+ extra_configurations:
32
40
  fs.s3a.access.key: 'your_access_key'
33
41
  fs.s3a.secret.key: 'your_secret_access_key'
34
-
35
42
  ```
36
43
 
37
44
  ## Build
data/build.gradle CHANGED
@@ -2,6 +2,7 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id "checkstyle"
5
6
  }
6
7
  import com.github.jrubygradle.JRubyExec
7
8
  repositories {
@@ -13,7 +14,11 @@ configurations {
13
14
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
14
15
  }
15
16
 
16
- version = "0.4.0"
17
+ version = "0.5.0"
18
+
19
+ sourceCompatibility = 1.7
20
+
21
+ targetCompatibility = 1.7
17
22
 
18
23
  dependencies {
19
24
  compile "org.embulk:embulk-core:0.7.10"
@@ -36,6 +41,22 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
36
41
  }
37
42
  clean { delete 'classpath' }
38
43
 
44
+ checkstyle {
45
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
46
+ toolVersion = '6.14.1'
47
+ }
48
+ checkstyleMain {
49
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
50
+ ignoreFailures = true
51
+ }
52
+ checkstyleTest {
53
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
54
+ ignoreFailures = true
55
+ }
56
+ task checkstyle(type: Checkstyle) {
57
+ classpath = sourceSets.main.output + sourceSets.test.output
58
+ source = sourceSets.main.allJava + sourceSets.test.allJava
59
+ }
39
60
  task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
40
61
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
41
62
  script "build/gemspec"
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.7-bin.zip
@@ -20,19 +20,23 @@ import java.util.HashMap;
20
20
  import java.util.List;
21
21
  import java.util.Map;
22
22
 
23
- public class EmbulkWriteSupport extends WriteSupport<PageReader> {
23
+ public class EmbulkWriteSupport
24
+ extends WriteSupport<PageReader>
25
+ {
24
26
  final Schema schema;
25
27
  RecordConsumer consumer;
26
28
  WriteContext writeContext;
27
29
  TimestampFormatter[] timestampFormatters;
28
30
 
29
- public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters) {
31
+ public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters)
32
+ {
30
33
  this.schema = schema;
31
34
  this.timestampFormatters = timestampFormatters;
32
35
  }
33
36
 
34
37
  @Override
35
- public WriteContext init(Configuration configuration) {
38
+ public WriteContext init(Configuration configuration)
39
+ {
36
40
  if (writeContext == null) {
37
41
  init();
38
42
  }
@@ -40,12 +44,14 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
40
44
  }
41
45
 
42
46
  @Override
43
- public void prepareForWrite(RecordConsumer recordConsumer) {
47
+ public void prepareForWrite(RecordConsumer recordConsumer)
48
+ {
44
49
  this.consumer = recordConsumer;
45
50
  }
46
51
 
47
52
  @Override
48
- public void write(PageReader record) {
53
+ public void write(PageReader record)
54
+ {
49
55
  final ColumnVisitor visitor = new ParquetColumnVisitor(record, consumer);
50
56
  consumer.startMessage();
51
57
  for (Column c : schema.getColumns()) {
@@ -58,58 +64,68 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
58
64
  consumer.endMessage();
59
65
  }
60
66
 
61
- private void init() {
67
+ private void init()
68
+ {
62
69
  MessageType messageType = convertSchema(schema);
63
70
  Map<String, String> metadata = new HashMap<>();
64
71
  writeContext = new WriteContext(messageType, metadata);
65
72
  }
66
73
 
67
- private MessageType convertSchema(Schema schema) {
74
+ private MessageType convertSchema(Schema schema)
75
+ {
68
76
  SchemaConvertColumnVisitor visitor = new SchemaConvertColumnVisitor();
69
77
  schema.visitColumns(visitor);
70
78
  String messageName = "embulk";
71
79
  return new MessageType(messageName, visitor.getConvertedFields());
72
80
  }
73
81
 
74
- class ParquetColumnVisitor implements ColumnVisitor {
82
+ class ParquetColumnVisitor
83
+ implements ColumnVisitor
84
+ {
75
85
  final PageReader record;
76
86
  final RecordConsumer consumer;
77
87
 
78
- public ParquetColumnVisitor(PageReader record, RecordConsumer consumer) {
88
+ public ParquetColumnVisitor(PageReader record, RecordConsumer consumer)
89
+ {
79
90
  this.record = record;
80
91
  this.consumer = consumer;
81
92
  }
82
93
 
83
94
  @Override
84
- public void booleanColumn(Column column) {
95
+ public void booleanColumn(Column column)
96
+ {
85
97
  if (!record.isNull(column)) {
86
98
  consumer.addBoolean(record.getBoolean(column));
87
99
  }
88
100
  }
89
101
 
90
102
  @Override
91
- public void longColumn(Column column) {
103
+ public void longColumn(Column column)
104
+ {
92
105
  if (!record.isNull(column)) {
93
106
  consumer.addLong(record.getLong(column));
94
107
  }
95
108
  }
96
109
 
97
110
  @Override
98
- public void doubleColumn(Column column) {
111
+ public void doubleColumn(Column column)
112
+ {
99
113
  if (!record.isNull(column)) {
100
114
  consumer.addDouble(record.getDouble(column));
101
115
  }
102
116
  }
103
117
 
104
118
  @Override
105
- public void stringColumn(Column column) {
119
+ public void stringColumn(Column column)
120
+ {
106
121
  if (!record.isNull(column)) {
107
122
  consumer.addBinary(Binary.fromString(record.getString(column)));
108
123
  }
109
124
  }
110
125
 
111
126
  @Override
112
- public void timestampColumn(Column column) {
127
+ public void timestampColumn(Column column)
128
+ {
113
129
  if (!record.isNull(column)) {
114
130
  Timestamp t = record.getTimestamp(column);
115
131
  String formatted = timestampFormatters[column.getIndex()].format(t);
@@ -118,36 +134,44 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
118
134
  }
119
135
  }
120
136
 
121
- class SchemaConvertColumnVisitor implements ColumnVisitor {
137
+ class SchemaConvertColumnVisitor
138
+ implements ColumnVisitor
139
+ {
122
140
  List<Type> fields = new ArrayList<>();
123
141
 
124
142
  @Override
125
- public void booleanColumn(Column column) {
143
+ public void booleanColumn(Column column)
144
+ {
126
145
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName()));
127
146
  }
128
147
 
129
148
  @Override
130
- public void longColumn(Column column) {
149
+ public void longColumn(Column column)
150
+ {
131
151
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName()));
132
152
  }
133
153
 
134
154
  @Override
135
- public void doubleColumn(Column column) {
155
+ public void doubleColumn(Column column)
156
+ {
136
157
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName()));
137
158
  }
138
159
 
139
160
  @Override
140
- public void stringColumn(Column column) {
161
+ public void stringColumn(Column column)
162
+ {
141
163
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
142
164
  }
143
165
 
144
166
  @Override
145
- public void timestampColumn(Column column) {
167
+ public void timestampColumn(Column column)
168
+ {
146
169
  // formatted as string
147
170
  fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
148
171
  }
149
172
 
150
- public List<Type> getConvertedFields() {
173
+ public List<Type> getConvertedFields()
174
+ {
151
175
  return fields;
152
176
  }
153
177
  }
@@ -8,23 +8,28 @@ import org.embulk.spi.PageReader;
8
8
  import org.embulk.spi.Schema;
9
9
  import org.embulk.spi.time.TimestampFormatter;
10
10
 
11
- public class EmbulkWriterBuilder extends ParquetWriter.Builder<PageReader, EmbulkWriterBuilder> {
11
+ public class EmbulkWriterBuilder
12
+ extends ParquetWriter.Builder<PageReader, EmbulkWriterBuilder>
13
+ {
12
14
  final Schema schema;
13
15
  final TimestampFormatter[] timestampFormatters;
14
16
 
15
- public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters) {
17
+ public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters)
18
+ {
16
19
  super(file);
17
20
  this.schema = schema;
18
21
  this.timestampFormatters = timestampFormatters;
19
22
  }
20
23
 
21
24
  @Override
22
- protected EmbulkWriterBuilder self() {
25
+ protected EmbulkWriterBuilder self()
26
+ {
23
27
  return this;
24
28
  }
25
29
 
26
30
  @Override
27
- protected WriteSupport<PageReader> getWriteSupport(Configuration conf) {
31
+ protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
32
+ {
28
33
  return new EmbulkWriteSupport(schema, timestampFormatters);
29
34
  }
30
35
  }
@@ -30,9 +30,11 @@ import java.util.Map;
30
30
 
31
31
  @SuppressWarnings("unused")
32
32
  public class ParquetOutputPlugin
33
- implements OutputPlugin {
33
+ implements OutputPlugin
34
+ {
34
35
  public interface PluginTask
35
- extends Task, TimestampFormatter.Task {
36
+ extends Task, TimestampFormatter.Task
37
+ {
36
38
  @Config("path_prefix")
37
39
  String getPathPrefix();
38
40
 
@@ -72,12 +74,14 @@ public class ParquetOutputPlugin
72
74
  }
73
75
 
74
76
  public interface TimestampColumnOption
75
- extends Task, TimestampFormatter.TimestampColumnOption {
77
+ extends Task, TimestampFormatter.TimestampColumnOption
78
+ {
76
79
  }
77
80
 
78
81
  public ConfigDiff transaction(ConfigSource config,
79
- Schema schema, int processorCount,
80
- OutputPlugin.Control control) {
82
+ Schema schema, int processorCount,
83
+ OutputPlugin.Control control)
84
+ {
81
85
  PluginTask task = config.loadConfig(PluginTask.class);
82
86
 
83
87
  //TODO
@@ -87,18 +91,21 @@ public class ParquetOutputPlugin
87
91
  }
88
92
 
89
93
  public ConfigDiff resume(TaskSource taskSource,
90
- Schema schema, int processorCount,
91
- OutputPlugin.Control control) {
94
+ Schema schema, int processorCount,
95
+ OutputPlugin.Control control)
96
+ {
92
97
  throw new UnsupportedOperationException("parquet output plugin does not support resuming");
93
98
  }
94
99
 
95
100
  public void cleanup(TaskSource taskSource,
96
- Schema schema, int processorCount,
97
- List<TaskReport> successTaskReports) {
101
+ Schema schema, int processorCount,
102
+ List<TaskReport> successTaskReports)
103
+ {
98
104
  //TODO
99
105
  }
100
106
 
101
- public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex) {
107
+ public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex)
108
+ {
102
109
  PluginTask task = taskSource.loadTask(PluginTask.class);
103
110
 
104
111
  final PageReader reader = new PageReader(schema);
@@ -107,14 +114,16 @@ public class ParquetOutputPlugin
107
114
  return new ParquetTransactionalPageOutput(reader, writer);
108
115
  }
109
116
 
110
- private String buildPath(PluginTask task, int processorIndex) {
117
+ private String buildPath(PluginTask task, int processorIndex)
118
+ {
111
119
  final String pathPrefix = task.getPathPrefix();
112
120
  final String pathSuffix = task.getFileNameExtension();
113
121
  final String sequenceFormat = task.getSequenceFormat();
114
122
  return pathPrefix + String.format(sequenceFormat, processorIndex) + pathSuffix;
115
123
  }
116
124
 
117
- private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex) {
125
+ private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex)
126
+ {
118
127
  final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
119
128
 
120
129
  final Path path = new Path(buildPath(task, processorIndex));
@@ -138,13 +147,15 @@ public class ParquetOutputPlugin
138
147
  }
139
148
 
140
149
  writer = builder.build();
141
- } catch (IOException e) {
150
+ }
151
+ catch (IOException e) {
142
152
  Throwables.propagate(e);
143
153
  }
144
154
  return writer;
145
155
  }
146
156
 
147
- private Configuration createConfiguration(Map<String, String> extra) {
157
+ private Configuration createConfiguration(Map<String, String> extra)
158
+ {
148
159
  Configuration conf = new Configuration();
149
160
 
150
161
  // Default values
@@ -161,49 +172,59 @@ public class ParquetOutputPlugin
161
172
  return conf;
162
173
  }
163
174
 
164
- class ParquetTransactionalPageOutput implements TransactionalPageOutput {
175
+ class ParquetTransactionalPageOutput
176
+ implements TransactionalPageOutput
177
+ {
165
178
  private PageReader reader;
166
179
  private ParquetWriter<PageReader> writer;
167
180
 
168
- public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer) {
181
+ public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer)
182
+ {
169
183
  this.reader = reader;
170
184
  this.writer = writer;
171
185
  }
172
186
 
173
187
  @Override
174
- public void add(Page page) {
188
+ public void add(Page page)
189
+ {
175
190
  try {
176
191
  reader.setPage(page);
177
192
  while (reader.nextRecord()) {
178
193
  writer.write(reader);
179
194
  }
180
- } catch (IOException e) {
195
+ }
196
+ catch (IOException e) {
181
197
  Throwables.propagate(e);
182
198
  }
183
199
  }
184
200
 
185
201
  @Override
186
- public void finish() {
202
+ public void finish()
203
+ {
187
204
  try {
188
205
  writer.close();
189
206
  writer = null;
190
- } catch (IOException e) {
207
+ }
208
+ catch (IOException e) {
191
209
  Throwables.propagate(e);
192
210
  }
193
211
  }
194
212
 
195
213
  @Override
196
- public void close() {
214
+ public void close()
215
+ {
197
216
  //TODO
198
217
  }
199
218
 
200
219
  @Override
201
- public void abort() {
220
+ public void abort()
221
+ {
202
222
  //TODO
203
223
  }
204
224
 
205
225
  @Override
206
- public TaskReport commit() {
226
+ public TaskReport commit()
227
+ {
207
228
  return Exec.newTaskReport();
208
229
  //TODO
209
230
  }
@@ -10,17 +10,20 @@ import org.junit.Test;
10
10
 
11
11
  import java.lang.reflect.InvocationTargetException;
12
12
  import java.lang.reflect.Method;
13
- import java.util.HashMap;
14
13
  import java.util.Map;
15
14
 
16
- import static org.junit.Assert.*;
15
+ import static org.junit.Assert.assertEquals;
16
+ import static org.junit.Assert.assertFalse;
17
+ import static org.junit.Assert.assertTrue;
17
18
 
18
- public class ParquetOutputPluginTest {
19
+ public class ParquetOutputPluginTest
20
+ {
19
21
  @Rule
20
22
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
21
23
 
22
24
  @Test
23
- public void checkDefaultValues() {
25
+ public void checkDefaultValues()
26
+ {
24
27
  ConfigSource config = Exec.newConfigSource()
25
28
  .set("path_prefix", "test");
26
29
 
@@ -34,14 +37,17 @@ public class ParquetOutputPluginTest {
34
37
  }
35
38
 
36
39
  @Test(expected = ConfigException.class)
37
- public void checkColumnsRequired() {
40
+ public void checkColumnsRequired()
41
+ {
38
42
  ConfigSource config = Exec.newConfigSource();
39
43
 
40
44
  config.loadConfig(ParquetOutputPlugin.PluginTask.class);
41
45
  }
42
46
 
43
47
  @Test
44
- public void checkExtraConfigurations() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
48
+ public void checkExtraConfigurations()
49
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException
50
+ {
45
51
  ConfigSource map = Exec.newConfigSource()
46
52
  .set("foo", "bar");
47
53
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - OKUNO Akihiro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-04 00:00:00.000000000 Z
11
+ date: 2017-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -50,6 +50,8 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
53
55
  - gradle/wrapper/gradle-wrapper.jar
54
56
  - gradle/wrapper/gradle-wrapper.properties
55
57
  - gradlew
@@ -83,7 +85,7 @@ files:
83
85
  - classpath/curator-client-2.7.1.jar
84
86
  - classpath/curator-framework-2.7.1.jar
85
87
  - classpath/curator-recipes-2.7.1.jar
86
- - classpath/embulk-output-parquet-0.4.0.jar
88
+ - classpath/embulk-output-parquet-0.5.0.jar
87
89
  - classpath/gson-2.2.4.jar
88
90
  - classpath/hadoop-annotations-2.7.1.jar
89
91
  - classpath/hadoop-auth-2.7.1.jar
@@ -121,7 +123,7 @@ files:
121
123
  - classpath/jetty-6.1.26.jar
122
124
  - classpath/jetty-util-6.1.26.jar
123
125
  - classpath/jline-0.9.94.jar
124
- - classpath/joda-time-2.9.1.jar
126
+ - classpath/joda-time-2.9.9.jar
125
127
  - classpath/jsch-0.1.42.jar
126
128
  - classpath/jsp-api-2.1.jar
127
129
  - classpath/jsr305-3.0.0.jar