embulk-output-parquet 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -2
- data/build.gradle +22 -1
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -1
- data/src/main/java/org/embulk/output/EmbulkWriteSupport.java +45 -21
- data/src/main/java/org/embulk/output/EmbulkWriterBuilder.java +9 -4
- data/src/main/java/org/embulk/output/ParquetOutputPlugin.java +44 -23
- data/src/test/java/org/embulk/output/ParquetOutputPluginTest.java +12 -6
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1ef34fa1ab2ea085e926b70700d4bff09e7bb16
|
4
|
+
data.tar.gz: d2d9df28c5ed603995193552104466da98d400eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a48e6ac6438c1cd56bf431b69ea8e980a54bff54290299d2da7a733c8defe4746230ba7afd2c5446b2d2ce8d42aa7cf74b6ca95c6dd8473cfefa278b891813f
|
7
|
+
data.tar.gz: adcfe86af5337ab4f41b2eb78ae84a2e20f71d40ad63652029d83cb50368ba40139f59f9f38a1bd8a7e7425c6838f96e39f733bbda6230ca9e37c1ae17b09f80
|
data/README.md
CHANGED
@@ -24,14 +24,21 @@ http://www.embulk.org/docs/built-in.html#csv-formatter-plugin).
|
|
24
24
|
|
25
25
|
## Example
|
26
26
|
|
27
|
+
```yaml
|
28
|
+
out:
|
29
|
+
type: parquet
|
30
|
+
path_prefix: file:///data/output
|
31
|
+
```
|
32
|
+
|
33
|
+
### How to write parquet files into S3
|
34
|
+
|
27
35
|
```yaml
|
28
36
|
out:
|
29
37
|
type: parquet
|
30
38
|
path_prefix: s3a://bucket/keys
|
31
|
-
|
39
|
+
extra_configurations:
|
32
40
|
fs.s3a.access.key: 'your_access_key'
|
33
41
|
fs.s3a.secret.key: 'your_secret_access_key'
|
34
|
-
|
35
42
|
```
|
36
43
|
|
37
44
|
## Build
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ plugins {
|
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
|
+
id "checkstyle"
|
5
6
|
}
|
6
7
|
import com.github.jrubygradle.JRubyExec
|
7
8
|
repositories {
|
@@ -13,7 +14,11 @@ configurations {
|
|
13
14
|
runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.
|
17
|
+
version = "0.5.0"
|
18
|
+
|
19
|
+
sourceCompatibility = 1.7
|
20
|
+
|
21
|
+
targetCompatibility = 1.7
|
17
22
|
|
18
23
|
dependencies {
|
19
24
|
compile "org.embulk:embulk-core:0.7.10"
|
@@ -36,6 +41,22 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
36
41
|
}
|
37
42
|
clean { delete 'classpath' }
|
38
43
|
|
44
|
+
checkstyle {
|
45
|
+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
|
46
|
+
toolVersion = '6.14.1'
|
47
|
+
}
|
48
|
+
checkstyleMain {
|
49
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
50
|
+
ignoreFailures = true
|
51
|
+
}
|
52
|
+
checkstyleTest {
|
53
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
54
|
+
ignoreFailures = true
|
55
|
+
}
|
56
|
+
task checkstyle(type: Checkstyle) {
|
57
|
+
classpath = sourceSets.main.output + sourceSets.test.output
|
58
|
+
source = sourceSets.main.allJava + sourceSets.test.allJava
|
59
|
+
}
|
39
60
|
task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
|
40
61
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
41
62
|
script "build/gemspec"
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<module name="Checker">
|
6
|
+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
|
7
|
+
<module name="FileTabCharacter"/>
|
8
|
+
<module name="NewlineAtEndOfFile">
|
9
|
+
<property name="lineSeparator" value="lf"/>
|
10
|
+
</module>
|
11
|
+
<module name="RegexpMultiline">
|
12
|
+
<property name="format" value="\r"/>
|
13
|
+
<property name="message" value="Line contains carriage return"/>
|
14
|
+
</module>
|
15
|
+
<module name="RegexpMultiline">
|
16
|
+
<property name="format" value=" \n"/>
|
17
|
+
<property name="message" value="Line has trailing whitespace"/>
|
18
|
+
</module>
|
19
|
+
<module name="RegexpMultiline">
|
20
|
+
<property name="format" value="\{\n\n"/>
|
21
|
+
<property name="message" value="Blank line after opening brace"/>
|
22
|
+
</module>
|
23
|
+
<module name="RegexpMultiline">
|
24
|
+
<property name="format" value="\n\n\s*\}"/>
|
25
|
+
<property name="message" value="Blank line before closing brace"/>
|
26
|
+
</module>
|
27
|
+
<module name="RegexpMultiline">
|
28
|
+
<property name="format" value="\n\n\n"/>
|
29
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
30
|
+
</module>
|
31
|
+
<module name="RegexpMultiline">
|
32
|
+
<property name="format" value="\n\n\Z"/>
|
33
|
+
<property name="message" value="Blank line before end of file"/>
|
34
|
+
</module>
|
35
|
+
<module name="RegexpMultiline">
|
36
|
+
<property name="format" value="Preconditions\.checkNotNull"/>
|
37
|
+
<property name="message" value="Use of checkNotNull"/>
|
38
|
+
</module>
|
39
|
+
|
40
|
+
<module name="TreeWalker">
|
41
|
+
<module name="EmptyBlock">
|
42
|
+
<property name="option" value="text"/>
|
43
|
+
<property name="tokens" value="
|
44
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
45
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
46
|
+
</module>
|
47
|
+
<module name="EmptyStatement"/>
|
48
|
+
<module name="EmptyForInitializerPad"/>
|
49
|
+
<module name="EmptyForIteratorPad">
|
50
|
+
<property name="option" value="space"/>
|
51
|
+
</module>
|
52
|
+
<module name="MethodParamPad">
|
53
|
+
<property name="allowLineBreaks" value="true"/>
|
54
|
+
<property name="option" value="nospace"/>
|
55
|
+
</module>
|
56
|
+
<module name="ParenPad"/>
|
57
|
+
<module name="TypecastParenPad"/>
|
58
|
+
<module name="NeedBraces"/>
|
59
|
+
<module name="LeftCurly">
|
60
|
+
<property name="option" value="nl"/>
|
61
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
62
|
+
</module>
|
63
|
+
<module name="LeftCurly">
|
64
|
+
<property name="option" value="eol"/>
|
65
|
+
<property name="tokens" value="
|
66
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
67
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
68
|
+
</module>
|
69
|
+
<module name="RightCurly">
|
70
|
+
<property name="option" value="alone"/>
|
71
|
+
</module>
|
72
|
+
<module name="GenericWhitespace"/>
|
73
|
+
<module name="WhitespaceAfter"/>
|
74
|
+
<module name="NoWhitespaceBefore"/>
|
75
|
+
|
76
|
+
<module name="UpperEll"/>
|
77
|
+
<module name="DefaultComesLast"/>
|
78
|
+
<module name="ArrayTypeStyle"/>
|
79
|
+
<module name="MultipleVariableDeclarations"/>
|
80
|
+
<module name="ModifierOrder"/>
|
81
|
+
<module name="OneStatementPerLine"/>
|
82
|
+
<module name="StringLiteralEquality"/>
|
83
|
+
<module name="MutableException"/>
|
84
|
+
<module name="EqualsHashCode"/>
|
85
|
+
<module name="InnerAssignment"/>
|
86
|
+
<module name="InterfaceIsType"/>
|
87
|
+
<module name="HideUtilityClassConstructor"/>
|
88
|
+
|
89
|
+
<module name="MemberName"/>
|
90
|
+
<module name="LocalVariableName"/>
|
91
|
+
<module name="LocalFinalVariableName"/>
|
92
|
+
<module name="TypeName"/>
|
93
|
+
<module name="PackageName"/>
|
94
|
+
<module name="ParameterName"/>
|
95
|
+
<module name="StaticVariableName"/>
|
96
|
+
<module name="ClassTypeParameterName">
|
97
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
98
|
+
</module>
|
99
|
+
<module name="MethodTypeParameterName">
|
100
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
101
|
+
</module>
|
102
|
+
|
103
|
+
<module name="AvoidStarImport"/>
|
104
|
+
<module name="RedundantImport"/>
|
105
|
+
<module name="UnusedImports"/>
|
106
|
+
<module name="ImportOrder">
|
107
|
+
<property name="groups" value="*,javax,java"/>
|
108
|
+
<property name="separated" value="true"/>
|
109
|
+
<property name="option" value="bottom"/>
|
110
|
+
<property name="sortStaticImportsAlphabetically" value="true"/>
|
111
|
+
</module>
|
112
|
+
|
113
|
+
<module name="WhitespaceAround">
|
114
|
+
<property name="allowEmptyConstructors" value="true"/>
|
115
|
+
<property name="allowEmptyMethods" value="true"/>
|
116
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
117
|
+
<property name="tokens" value="
|
118
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
119
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
120
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
121
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
122
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
123
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
124
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
125
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
126
|
+
</module>
|
127
|
+
</module>
|
128
|
+
</module>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
@@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME
|
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.7-bin.zip
|
@@ -20,19 +20,23 @@ import java.util.HashMap;
|
|
20
20
|
import java.util.List;
|
21
21
|
import java.util.Map;
|
22
22
|
|
23
|
-
public class EmbulkWriteSupport
|
23
|
+
public class EmbulkWriteSupport
|
24
|
+
extends WriteSupport<PageReader>
|
25
|
+
{
|
24
26
|
final Schema schema;
|
25
27
|
RecordConsumer consumer;
|
26
28
|
WriteContext writeContext;
|
27
29
|
TimestampFormatter[] timestampFormatters;
|
28
30
|
|
29
|
-
public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters)
|
31
|
+
public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters)
|
32
|
+
{
|
30
33
|
this.schema = schema;
|
31
34
|
this.timestampFormatters = timestampFormatters;
|
32
35
|
}
|
33
36
|
|
34
37
|
@Override
|
35
|
-
public WriteContext init(Configuration configuration)
|
38
|
+
public WriteContext init(Configuration configuration)
|
39
|
+
{
|
36
40
|
if (writeContext == null) {
|
37
41
|
init();
|
38
42
|
}
|
@@ -40,12 +44,14 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
|
|
40
44
|
}
|
41
45
|
|
42
46
|
@Override
|
43
|
-
public void prepareForWrite(RecordConsumer recordConsumer)
|
47
|
+
public void prepareForWrite(RecordConsumer recordConsumer)
|
48
|
+
{
|
44
49
|
this.consumer = recordConsumer;
|
45
50
|
}
|
46
51
|
|
47
52
|
@Override
|
48
|
-
public void write(PageReader record)
|
53
|
+
public void write(PageReader record)
|
54
|
+
{
|
49
55
|
final ColumnVisitor visitor = new ParquetColumnVisitor(record, consumer);
|
50
56
|
consumer.startMessage();
|
51
57
|
for (Column c : schema.getColumns()) {
|
@@ -58,58 +64,68 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
|
|
58
64
|
consumer.endMessage();
|
59
65
|
}
|
60
66
|
|
61
|
-
private void init()
|
67
|
+
private void init()
|
68
|
+
{
|
62
69
|
MessageType messageType = convertSchema(schema);
|
63
70
|
Map<String, String> metadata = new HashMap<>();
|
64
71
|
writeContext = new WriteContext(messageType, metadata);
|
65
72
|
}
|
66
73
|
|
67
|
-
private MessageType convertSchema(Schema schema)
|
74
|
+
private MessageType convertSchema(Schema schema)
|
75
|
+
{
|
68
76
|
SchemaConvertColumnVisitor visitor = new SchemaConvertColumnVisitor();
|
69
77
|
schema.visitColumns(visitor);
|
70
78
|
String messageName = "embulk";
|
71
79
|
return new MessageType(messageName, visitor.getConvertedFields());
|
72
80
|
}
|
73
81
|
|
74
|
-
class ParquetColumnVisitor
|
82
|
+
class ParquetColumnVisitor
|
83
|
+
implements ColumnVisitor
|
84
|
+
{
|
75
85
|
final PageReader record;
|
76
86
|
final RecordConsumer consumer;
|
77
87
|
|
78
|
-
public ParquetColumnVisitor(PageReader record, RecordConsumer consumer)
|
88
|
+
public ParquetColumnVisitor(PageReader record, RecordConsumer consumer)
|
89
|
+
{
|
79
90
|
this.record = record;
|
80
91
|
this.consumer = consumer;
|
81
92
|
}
|
82
93
|
|
83
94
|
@Override
|
84
|
-
public void booleanColumn(Column column)
|
95
|
+
public void booleanColumn(Column column)
|
96
|
+
{
|
85
97
|
if (!record.isNull(column)) {
|
86
98
|
consumer.addBoolean(record.getBoolean(column));
|
87
99
|
}
|
88
100
|
}
|
89
101
|
|
90
102
|
@Override
|
91
|
-
public void longColumn(Column column)
|
103
|
+
public void longColumn(Column column)
|
104
|
+
{
|
92
105
|
if (!record.isNull(column)) {
|
93
106
|
consumer.addLong(record.getLong(column));
|
94
107
|
}
|
95
108
|
}
|
96
109
|
|
97
110
|
@Override
|
98
|
-
public void doubleColumn(Column column)
|
111
|
+
public void doubleColumn(Column column)
|
112
|
+
{
|
99
113
|
if (!record.isNull(column)) {
|
100
114
|
consumer.addDouble(record.getDouble(column));
|
101
115
|
}
|
102
116
|
}
|
103
117
|
|
104
118
|
@Override
|
105
|
-
public void stringColumn(Column column)
|
119
|
+
public void stringColumn(Column column)
|
120
|
+
{
|
106
121
|
if (!record.isNull(column)) {
|
107
122
|
consumer.addBinary(Binary.fromString(record.getString(column)));
|
108
123
|
}
|
109
124
|
}
|
110
125
|
|
111
126
|
@Override
|
112
|
-
public void timestampColumn(Column column)
|
127
|
+
public void timestampColumn(Column column)
|
128
|
+
{
|
113
129
|
if (!record.isNull(column)) {
|
114
130
|
Timestamp t = record.getTimestamp(column);
|
115
131
|
String formatted = timestampFormatters[column.getIndex()].format(t);
|
@@ -118,36 +134,44 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
|
|
118
134
|
}
|
119
135
|
}
|
120
136
|
|
121
|
-
class SchemaConvertColumnVisitor
|
137
|
+
class SchemaConvertColumnVisitor
|
138
|
+
implements ColumnVisitor
|
139
|
+
{
|
122
140
|
List<Type> fields = new ArrayList<>();
|
123
141
|
|
124
142
|
@Override
|
125
|
-
public void booleanColumn(Column column)
|
143
|
+
public void booleanColumn(Column column)
|
144
|
+
{
|
126
145
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName()));
|
127
146
|
}
|
128
147
|
|
129
148
|
@Override
|
130
|
-
public void longColumn(Column column)
|
149
|
+
public void longColumn(Column column)
|
150
|
+
{
|
131
151
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName()));
|
132
152
|
}
|
133
153
|
|
134
154
|
@Override
|
135
|
-
public void doubleColumn(Column column)
|
155
|
+
public void doubleColumn(Column column)
|
156
|
+
{
|
136
157
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName()));
|
137
158
|
}
|
138
159
|
|
139
160
|
@Override
|
140
|
-
public void stringColumn(Column column)
|
161
|
+
public void stringColumn(Column column)
|
162
|
+
{
|
141
163
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
|
142
164
|
}
|
143
165
|
|
144
166
|
@Override
|
145
|
-
public void timestampColumn(Column column)
|
167
|
+
public void timestampColumn(Column column)
|
168
|
+
{
|
146
169
|
// formatted as string
|
147
170
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
|
148
171
|
}
|
149
172
|
|
150
|
-
public List<Type> getConvertedFields()
|
173
|
+
public List<Type> getConvertedFields()
|
174
|
+
{
|
151
175
|
return fields;
|
152
176
|
}
|
153
177
|
}
|
@@ -8,23 +8,28 @@ import org.embulk.spi.PageReader;
|
|
8
8
|
import org.embulk.spi.Schema;
|
9
9
|
import org.embulk.spi.time.TimestampFormatter;
|
10
10
|
|
11
|
-
public class EmbulkWriterBuilder
|
11
|
+
public class EmbulkWriterBuilder
|
12
|
+
extends ParquetWriter.Builder<PageReader, EmbulkWriterBuilder>
|
13
|
+
{
|
12
14
|
final Schema schema;
|
13
15
|
final TimestampFormatter[] timestampFormatters;
|
14
16
|
|
15
|
-
public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters)
|
17
|
+
public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters)
|
18
|
+
{
|
16
19
|
super(file);
|
17
20
|
this.schema = schema;
|
18
21
|
this.timestampFormatters = timestampFormatters;
|
19
22
|
}
|
20
23
|
|
21
24
|
@Override
|
22
|
-
protected EmbulkWriterBuilder self()
|
25
|
+
protected EmbulkWriterBuilder self()
|
26
|
+
{
|
23
27
|
return this;
|
24
28
|
}
|
25
29
|
|
26
30
|
@Override
|
27
|
-
protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
|
31
|
+
protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
|
32
|
+
{
|
28
33
|
return new EmbulkWriteSupport(schema, timestampFormatters);
|
29
34
|
}
|
30
35
|
}
|
@@ -30,9 +30,11 @@ import java.util.Map;
|
|
30
30
|
|
31
31
|
@SuppressWarnings("unused")
|
32
32
|
public class ParquetOutputPlugin
|
33
|
-
implements OutputPlugin
|
33
|
+
implements OutputPlugin
|
34
|
+
{
|
34
35
|
public interface PluginTask
|
35
|
-
extends Task, TimestampFormatter.Task
|
36
|
+
extends Task, TimestampFormatter.Task
|
37
|
+
{
|
36
38
|
@Config("path_prefix")
|
37
39
|
String getPathPrefix();
|
38
40
|
|
@@ -72,12 +74,14 @@ public class ParquetOutputPlugin
|
|
72
74
|
}
|
73
75
|
|
74
76
|
public interface TimestampColumnOption
|
75
|
-
extends Task, TimestampFormatter.TimestampColumnOption
|
77
|
+
extends Task, TimestampFormatter.TimestampColumnOption
|
78
|
+
{
|
76
79
|
}
|
77
80
|
|
78
81
|
public ConfigDiff transaction(ConfigSource config,
|
79
|
-
|
80
|
-
|
82
|
+
Schema schema, int processorCount,
|
83
|
+
OutputPlugin.Control control)
|
84
|
+
{
|
81
85
|
PluginTask task = config.loadConfig(PluginTask.class);
|
82
86
|
|
83
87
|
//TODO
|
@@ -87,18 +91,21 @@ public class ParquetOutputPlugin
|
|
87
91
|
}
|
88
92
|
|
89
93
|
public ConfigDiff resume(TaskSource taskSource,
|
90
|
-
|
91
|
-
|
94
|
+
Schema schema, int processorCount,
|
95
|
+
OutputPlugin.Control control)
|
96
|
+
{
|
92
97
|
throw new UnsupportedOperationException("parquet output plugin does not support resuming");
|
93
98
|
}
|
94
99
|
|
95
100
|
public void cleanup(TaskSource taskSource,
|
96
|
-
|
97
|
-
|
101
|
+
Schema schema, int processorCount,
|
102
|
+
List<TaskReport> successTaskReports)
|
103
|
+
{
|
98
104
|
//TODO
|
99
105
|
}
|
100
106
|
|
101
|
-
public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex)
|
107
|
+
public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex)
|
108
|
+
{
|
102
109
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
103
110
|
|
104
111
|
final PageReader reader = new PageReader(schema);
|
@@ -107,14 +114,16 @@ public class ParquetOutputPlugin
|
|
107
114
|
return new ParquetTransactionalPageOutput(reader, writer);
|
108
115
|
}
|
109
116
|
|
110
|
-
private String buildPath(PluginTask task, int processorIndex)
|
117
|
+
private String buildPath(PluginTask task, int processorIndex)
|
118
|
+
{
|
111
119
|
final String pathPrefix = task.getPathPrefix();
|
112
120
|
final String pathSuffix = task.getFileNameExtension();
|
113
121
|
final String sequenceFormat = task.getSequenceFormat();
|
114
122
|
return pathPrefix + String.format(sequenceFormat, processorIndex) + pathSuffix;
|
115
123
|
}
|
116
124
|
|
117
|
-
private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex)
|
125
|
+
private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex)
|
126
|
+
{
|
118
127
|
final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
|
119
128
|
|
120
129
|
final Path path = new Path(buildPath(task, processorIndex));
|
@@ -138,13 +147,15 @@ public class ParquetOutputPlugin
|
|
138
147
|
}
|
139
148
|
|
140
149
|
writer = builder.build();
|
141
|
-
}
|
150
|
+
}
|
151
|
+
catch (IOException e) {
|
142
152
|
Throwables.propagate(e);
|
143
153
|
}
|
144
154
|
return writer;
|
145
155
|
}
|
146
156
|
|
147
|
-
private Configuration createConfiguration(Map<String, String> extra)
|
157
|
+
private Configuration createConfiguration(Map<String, String> extra)
|
158
|
+
{
|
148
159
|
Configuration conf = new Configuration();
|
149
160
|
|
150
161
|
// Default values
|
@@ -161,49 +172,59 @@ public class ParquetOutputPlugin
|
|
161
172
|
return conf;
|
162
173
|
}
|
163
174
|
|
164
|
-
class ParquetTransactionalPageOutput
|
175
|
+
class ParquetTransactionalPageOutput
|
176
|
+
implements TransactionalPageOutput
|
177
|
+
{
|
165
178
|
private PageReader reader;
|
166
179
|
private ParquetWriter<PageReader> writer;
|
167
180
|
|
168
|
-
public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer)
|
181
|
+
public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer)
|
182
|
+
{
|
169
183
|
this.reader = reader;
|
170
184
|
this.writer = writer;
|
171
185
|
}
|
172
186
|
|
173
187
|
@Override
|
174
|
-
public void add(Page page)
|
188
|
+
public void add(Page page)
|
189
|
+
{
|
175
190
|
try {
|
176
191
|
reader.setPage(page);
|
177
192
|
while (reader.nextRecord()) {
|
178
193
|
writer.write(reader);
|
179
194
|
}
|
180
|
-
}
|
195
|
+
}
|
196
|
+
catch (IOException e) {
|
181
197
|
Throwables.propagate(e);
|
182
198
|
}
|
183
199
|
}
|
184
200
|
|
185
201
|
@Override
|
186
|
-
public void finish()
|
202
|
+
public void finish()
|
203
|
+
{
|
187
204
|
try {
|
188
205
|
writer.close();
|
189
206
|
writer = null;
|
190
|
-
}
|
207
|
+
}
|
208
|
+
catch (IOException e) {
|
191
209
|
Throwables.propagate(e);
|
192
210
|
}
|
193
211
|
}
|
194
212
|
|
195
213
|
@Override
|
196
|
-
public void close()
|
214
|
+
public void close()
|
215
|
+
{
|
197
216
|
//TODO
|
198
217
|
}
|
199
218
|
|
200
219
|
@Override
|
201
|
-
public void abort()
|
220
|
+
public void abort()
|
221
|
+
{
|
202
222
|
//TODO
|
203
223
|
}
|
204
224
|
|
205
225
|
@Override
|
206
|
-
public TaskReport commit()
|
226
|
+
public TaskReport commit()
|
227
|
+
{
|
207
228
|
return Exec.newTaskReport();
|
208
229
|
//TODO
|
209
230
|
}
|
@@ -10,17 +10,20 @@ import org.junit.Test;
|
|
10
10
|
|
11
11
|
import java.lang.reflect.InvocationTargetException;
|
12
12
|
import java.lang.reflect.Method;
|
13
|
-
import java.util.HashMap;
|
14
13
|
import java.util.Map;
|
15
14
|
|
16
|
-
import static org.junit.Assert
|
15
|
+
import static org.junit.Assert.assertEquals;
|
16
|
+
import static org.junit.Assert.assertFalse;
|
17
|
+
import static org.junit.Assert.assertTrue;
|
17
18
|
|
18
|
-
public class ParquetOutputPluginTest
|
19
|
+
public class ParquetOutputPluginTest
|
20
|
+
{
|
19
21
|
@Rule
|
20
22
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
21
23
|
|
22
24
|
@Test
|
23
|
-
public void checkDefaultValues()
|
25
|
+
public void checkDefaultValues()
|
26
|
+
{
|
24
27
|
ConfigSource config = Exec.newConfigSource()
|
25
28
|
.set("path_prefix", "test");
|
26
29
|
|
@@ -34,14 +37,17 @@ public class ParquetOutputPluginTest {
|
|
34
37
|
}
|
35
38
|
|
36
39
|
@Test(expected = ConfigException.class)
|
37
|
-
public void checkColumnsRequired()
|
40
|
+
public void checkColumnsRequired()
|
41
|
+
{
|
38
42
|
ConfigSource config = Exec.newConfigSource();
|
39
43
|
|
40
44
|
config.loadConfig(ParquetOutputPlugin.PluginTask.class);
|
41
45
|
}
|
42
46
|
|
43
47
|
@Test
|
44
|
-
public void checkExtraConfigurations()
|
48
|
+
public void checkExtraConfigurations()
|
49
|
+
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException
|
50
|
+
{
|
45
51
|
ConfigSource map = Exec.newConfigSource()
|
46
52
|
.set("foo", "bar");
|
47
53
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OKUNO Akihiro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -50,6 +50,8 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- config/checkstyle/checkstyle.xml
|
54
|
+
- config/checkstyle/default.xml
|
53
55
|
- gradle/wrapper/gradle-wrapper.jar
|
54
56
|
- gradle/wrapper/gradle-wrapper.properties
|
55
57
|
- gradlew
|
@@ -83,7 +85,7 @@ files:
|
|
83
85
|
- classpath/curator-client-2.7.1.jar
|
84
86
|
- classpath/curator-framework-2.7.1.jar
|
85
87
|
- classpath/curator-recipes-2.7.1.jar
|
86
|
-
- classpath/embulk-output-parquet-0.
|
88
|
+
- classpath/embulk-output-parquet-0.5.0.jar
|
87
89
|
- classpath/gson-2.2.4.jar
|
88
90
|
- classpath/hadoop-annotations-2.7.1.jar
|
89
91
|
- classpath/hadoop-auth-2.7.1.jar
|
@@ -121,7 +123,7 @@ files:
|
|
121
123
|
- classpath/jetty-6.1.26.jar
|
122
124
|
- classpath/jetty-util-6.1.26.jar
|
123
125
|
- classpath/jline-0.9.94.jar
|
124
|
-
- classpath/joda-time-2.9.
|
126
|
+
- classpath/joda-time-2.9.9.jar
|
125
127
|
- classpath/jsch-0.1.42.jar
|
126
128
|
- classpath/jsp-api-2.1.jar
|
127
129
|
- classpath/jsr305-3.0.0.jar
|