embulk-output-parquet 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -2
- data/build.gradle +22 -1
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -1
- data/src/main/java/org/embulk/output/EmbulkWriteSupport.java +45 -21
- data/src/main/java/org/embulk/output/EmbulkWriterBuilder.java +9 -4
- data/src/main/java/org/embulk/output/ParquetOutputPlugin.java +44 -23
- data/src/test/java/org/embulk/output/ParquetOutputPluginTest.java +12 -6
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1ef34fa1ab2ea085e926b70700d4bff09e7bb16
|
4
|
+
data.tar.gz: d2d9df28c5ed603995193552104466da98d400eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a48e6ac6438c1cd56bf431b69ea8e980a54bff54290299d2da7a733c8defe4746230ba7afd2c5446b2d2ce8d42aa7cf74b6ca95c6dd8473cfefa278b891813f
|
7
|
+
data.tar.gz: adcfe86af5337ab4f41b2eb78ae84a2e20f71d40ad63652029d83cb50368ba40139f59f9f38a1bd8a7e7425c6838f96e39f733bbda6230ca9e37c1ae17b09f80
|
data/README.md
CHANGED
@@ -24,14 +24,21 @@ http://www.embulk.org/docs/built-in.html#csv-formatter-plugin).
|
|
24
24
|
|
25
25
|
## Example
|
26
26
|
|
27
|
+
```yaml
|
28
|
+
out:
|
29
|
+
type: parquet
|
30
|
+
path_prefix: file:///data/output
|
31
|
+
```
|
32
|
+
|
33
|
+
### How to write parquet files into S3
|
34
|
+
|
27
35
|
```yaml
|
28
36
|
out:
|
29
37
|
type: parquet
|
30
38
|
path_prefix: s3a://bucket/keys
|
31
|
-
|
39
|
+
extra_configurations:
|
32
40
|
fs.s3a.access.key: 'your_access_key'
|
33
41
|
fs.s3a.secret.key: 'your_secret_access_key'
|
34
|
-
|
35
42
|
```
|
36
43
|
|
37
44
|
## Build
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ plugins {
|
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
|
+
id "checkstyle"
|
5
6
|
}
|
6
7
|
import com.github.jrubygradle.JRubyExec
|
7
8
|
repositories {
|
@@ -13,7 +14,11 @@ configurations {
|
|
13
14
|
runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.
|
17
|
+
version = "0.5.0"
|
18
|
+
|
19
|
+
sourceCompatibility = 1.7
|
20
|
+
|
21
|
+
targetCompatibility = 1.7
|
17
22
|
|
18
23
|
dependencies {
|
19
24
|
compile "org.embulk:embulk-core:0.7.10"
|
@@ -36,6 +41,22 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
36
41
|
}
|
37
42
|
clean { delete 'classpath' }
|
38
43
|
|
44
|
+
checkstyle {
|
45
|
+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
|
46
|
+
toolVersion = '6.14.1'
|
47
|
+
}
|
48
|
+
checkstyleMain {
|
49
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
50
|
+
ignoreFailures = true
|
51
|
+
}
|
52
|
+
checkstyleTest {
|
53
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
54
|
+
ignoreFailures = true
|
55
|
+
}
|
56
|
+
task checkstyle(type: Checkstyle) {
|
57
|
+
classpath = sourceSets.main.output + sourceSets.test.output
|
58
|
+
source = sourceSets.main.allJava + sourceSets.test.allJava
|
59
|
+
}
|
39
60
|
task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
|
40
61
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
41
62
|
script "build/gemspec"
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<module name="Checker">
|
6
|
+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
|
7
|
+
<module name="FileTabCharacter"/>
|
8
|
+
<module name="NewlineAtEndOfFile">
|
9
|
+
<property name="lineSeparator" value="lf"/>
|
10
|
+
</module>
|
11
|
+
<module name="RegexpMultiline">
|
12
|
+
<property name="format" value="\r"/>
|
13
|
+
<property name="message" value="Line contains carriage return"/>
|
14
|
+
</module>
|
15
|
+
<module name="RegexpMultiline">
|
16
|
+
<property name="format" value=" \n"/>
|
17
|
+
<property name="message" value="Line has trailing whitespace"/>
|
18
|
+
</module>
|
19
|
+
<module name="RegexpMultiline">
|
20
|
+
<property name="format" value="\{\n\n"/>
|
21
|
+
<property name="message" value="Blank line after opening brace"/>
|
22
|
+
</module>
|
23
|
+
<module name="RegexpMultiline">
|
24
|
+
<property name="format" value="\n\n\s*\}"/>
|
25
|
+
<property name="message" value="Blank line before closing brace"/>
|
26
|
+
</module>
|
27
|
+
<module name="RegexpMultiline">
|
28
|
+
<property name="format" value="\n\n\n"/>
|
29
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
30
|
+
</module>
|
31
|
+
<module name="RegexpMultiline">
|
32
|
+
<property name="format" value="\n\n\Z"/>
|
33
|
+
<property name="message" value="Blank line before end of file"/>
|
34
|
+
</module>
|
35
|
+
<module name="RegexpMultiline">
|
36
|
+
<property name="format" value="Preconditions\.checkNotNull"/>
|
37
|
+
<property name="message" value="Use of checkNotNull"/>
|
38
|
+
</module>
|
39
|
+
|
40
|
+
<module name="TreeWalker">
|
41
|
+
<module name="EmptyBlock">
|
42
|
+
<property name="option" value="text"/>
|
43
|
+
<property name="tokens" value="
|
44
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
45
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
46
|
+
</module>
|
47
|
+
<module name="EmptyStatement"/>
|
48
|
+
<module name="EmptyForInitializerPad"/>
|
49
|
+
<module name="EmptyForIteratorPad">
|
50
|
+
<property name="option" value="space"/>
|
51
|
+
</module>
|
52
|
+
<module name="MethodParamPad">
|
53
|
+
<property name="allowLineBreaks" value="true"/>
|
54
|
+
<property name="option" value="nospace"/>
|
55
|
+
</module>
|
56
|
+
<module name="ParenPad"/>
|
57
|
+
<module name="TypecastParenPad"/>
|
58
|
+
<module name="NeedBraces"/>
|
59
|
+
<module name="LeftCurly">
|
60
|
+
<property name="option" value="nl"/>
|
61
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
62
|
+
</module>
|
63
|
+
<module name="LeftCurly">
|
64
|
+
<property name="option" value="eol"/>
|
65
|
+
<property name="tokens" value="
|
66
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
67
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
68
|
+
</module>
|
69
|
+
<module name="RightCurly">
|
70
|
+
<property name="option" value="alone"/>
|
71
|
+
</module>
|
72
|
+
<module name="GenericWhitespace"/>
|
73
|
+
<module name="WhitespaceAfter"/>
|
74
|
+
<module name="NoWhitespaceBefore"/>
|
75
|
+
|
76
|
+
<module name="UpperEll"/>
|
77
|
+
<module name="DefaultComesLast"/>
|
78
|
+
<module name="ArrayTypeStyle"/>
|
79
|
+
<module name="MultipleVariableDeclarations"/>
|
80
|
+
<module name="ModifierOrder"/>
|
81
|
+
<module name="OneStatementPerLine"/>
|
82
|
+
<module name="StringLiteralEquality"/>
|
83
|
+
<module name="MutableException"/>
|
84
|
+
<module name="EqualsHashCode"/>
|
85
|
+
<module name="InnerAssignment"/>
|
86
|
+
<module name="InterfaceIsType"/>
|
87
|
+
<module name="HideUtilityClassConstructor"/>
|
88
|
+
|
89
|
+
<module name="MemberName"/>
|
90
|
+
<module name="LocalVariableName"/>
|
91
|
+
<module name="LocalFinalVariableName"/>
|
92
|
+
<module name="TypeName"/>
|
93
|
+
<module name="PackageName"/>
|
94
|
+
<module name="ParameterName"/>
|
95
|
+
<module name="StaticVariableName"/>
|
96
|
+
<module name="ClassTypeParameterName">
|
97
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
98
|
+
</module>
|
99
|
+
<module name="MethodTypeParameterName">
|
100
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
101
|
+
</module>
|
102
|
+
|
103
|
+
<module name="AvoidStarImport"/>
|
104
|
+
<module name="RedundantImport"/>
|
105
|
+
<module name="UnusedImports"/>
|
106
|
+
<module name="ImportOrder">
|
107
|
+
<property name="groups" value="*,javax,java"/>
|
108
|
+
<property name="separated" value="true"/>
|
109
|
+
<property name="option" value="bottom"/>
|
110
|
+
<property name="sortStaticImportsAlphabetically" value="true"/>
|
111
|
+
</module>
|
112
|
+
|
113
|
+
<module name="WhitespaceAround">
|
114
|
+
<property name="allowEmptyConstructors" value="true"/>
|
115
|
+
<property name="allowEmptyMethods" value="true"/>
|
116
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
117
|
+
<property name="tokens" value="
|
118
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
119
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
120
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
121
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
122
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
123
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
124
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
125
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
126
|
+
</module>
|
127
|
+
</module>
|
128
|
+
</module>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
@@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME
|
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.7-bin.zip
|
@@ -20,19 +20,23 @@ import java.util.HashMap;
|
|
20
20
|
import java.util.List;
|
21
21
|
import java.util.Map;
|
22
22
|
|
23
|
-
public class EmbulkWriteSupport
|
23
|
+
public class EmbulkWriteSupport
|
24
|
+
extends WriteSupport<PageReader>
|
25
|
+
{
|
24
26
|
final Schema schema;
|
25
27
|
RecordConsumer consumer;
|
26
28
|
WriteContext writeContext;
|
27
29
|
TimestampFormatter[] timestampFormatters;
|
28
30
|
|
29
|
-
public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters)
|
31
|
+
public EmbulkWriteSupport(Schema schema, TimestampFormatter[] timestampFormatters)
|
32
|
+
{
|
30
33
|
this.schema = schema;
|
31
34
|
this.timestampFormatters = timestampFormatters;
|
32
35
|
}
|
33
36
|
|
34
37
|
@Override
|
35
|
-
public WriteContext init(Configuration configuration)
|
38
|
+
public WriteContext init(Configuration configuration)
|
39
|
+
{
|
36
40
|
if (writeContext == null) {
|
37
41
|
init();
|
38
42
|
}
|
@@ -40,12 +44,14 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
|
|
40
44
|
}
|
41
45
|
|
42
46
|
@Override
|
43
|
-
public void prepareForWrite(RecordConsumer recordConsumer)
|
47
|
+
public void prepareForWrite(RecordConsumer recordConsumer)
|
48
|
+
{
|
44
49
|
this.consumer = recordConsumer;
|
45
50
|
}
|
46
51
|
|
47
52
|
@Override
|
48
|
-
public void write(PageReader record)
|
53
|
+
public void write(PageReader record)
|
54
|
+
{
|
49
55
|
final ColumnVisitor visitor = new ParquetColumnVisitor(record, consumer);
|
50
56
|
consumer.startMessage();
|
51
57
|
for (Column c : schema.getColumns()) {
|
@@ -58,58 +64,68 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
|
|
58
64
|
consumer.endMessage();
|
59
65
|
}
|
60
66
|
|
61
|
-
private void init()
|
67
|
+
private void init()
|
68
|
+
{
|
62
69
|
MessageType messageType = convertSchema(schema);
|
63
70
|
Map<String, String> metadata = new HashMap<>();
|
64
71
|
writeContext = new WriteContext(messageType, metadata);
|
65
72
|
}
|
66
73
|
|
67
|
-
private MessageType convertSchema(Schema schema)
|
74
|
+
private MessageType convertSchema(Schema schema)
|
75
|
+
{
|
68
76
|
SchemaConvertColumnVisitor visitor = new SchemaConvertColumnVisitor();
|
69
77
|
schema.visitColumns(visitor);
|
70
78
|
String messageName = "embulk";
|
71
79
|
return new MessageType(messageName, visitor.getConvertedFields());
|
72
80
|
}
|
73
81
|
|
74
|
-
class ParquetColumnVisitor
|
82
|
+
class ParquetColumnVisitor
|
83
|
+
implements ColumnVisitor
|
84
|
+
{
|
75
85
|
final PageReader record;
|
76
86
|
final RecordConsumer consumer;
|
77
87
|
|
78
|
-
public ParquetColumnVisitor(PageReader record, RecordConsumer consumer)
|
88
|
+
public ParquetColumnVisitor(PageReader record, RecordConsumer consumer)
|
89
|
+
{
|
79
90
|
this.record = record;
|
80
91
|
this.consumer = consumer;
|
81
92
|
}
|
82
93
|
|
83
94
|
@Override
|
84
|
-
public void booleanColumn(Column column)
|
95
|
+
public void booleanColumn(Column column)
|
96
|
+
{
|
85
97
|
if (!record.isNull(column)) {
|
86
98
|
consumer.addBoolean(record.getBoolean(column));
|
87
99
|
}
|
88
100
|
}
|
89
101
|
|
90
102
|
@Override
|
91
|
-
public void longColumn(Column column)
|
103
|
+
public void longColumn(Column column)
|
104
|
+
{
|
92
105
|
if (!record.isNull(column)) {
|
93
106
|
consumer.addLong(record.getLong(column));
|
94
107
|
}
|
95
108
|
}
|
96
109
|
|
97
110
|
@Override
|
98
|
-
public void doubleColumn(Column column)
|
111
|
+
public void doubleColumn(Column column)
|
112
|
+
{
|
99
113
|
if (!record.isNull(column)) {
|
100
114
|
consumer.addDouble(record.getDouble(column));
|
101
115
|
}
|
102
116
|
}
|
103
117
|
|
104
118
|
@Override
|
105
|
-
public void stringColumn(Column column)
|
119
|
+
public void stringColumn(Column column)
|
120
|
+
{
|
106
121
|
if (!record.isNull(column)) {
|
107
122
|
consumer.addBinary(Binary.fromString(record.getString(column)));
|
108
123
|
}
|
109
124
|
}
|
110
125
|
|
111
126
|
@Override
|
112
|
-
public void timestampColumn(Column column)
|
127
|
+
public void timestampColumn(Column column)
|
128
|
+
{
|
113
129
|
if (!record.isNull(column)) {
|
114
130
|
Timestamp t = record.getTimestamp(column);
|
115
131
|
String formatted = timestampFormatters[column.getIndex()].format(t);
|
@@ -118,36 +134,44 @@ public class EmbulkWriteSupport extends WriteSupport<PageReader> {
|
|
118
134
|
}
|
119
135
|
}
|
120
136
|
|
121
|
-
class SchemaConvertColumnVisitor
|
137
|
+
class SchemaConvertColumnVisitor
|
138
|
+
implements ColumnVisitor
|
139
|
+
{
|
122
140
|
List<Type> fields = new ArrayList<>();
|
123
141
|
|
124
142
|
@Override
|
125
|
-
public void booleanColumn(Column column)
|
143
|
+
public void booleanColumn(Column column)
|
144
|
+
{
|
126
145
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName()));
|
127
146
|
}
|
128
147
|
|
129
148
|
@Override
|
130
|
-
public void longColumn(Column column)
|
149
|
+
public void longColumn(Column column)
|
150
|
+
{
|
131
151
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName()));
|
132
152
|
}
|
133
153
|
|
134
154
|
@Override
|
135
|
-
public void doubleColumn(Column column)
|
155
|
+
public void doubleColumn(Column column)
|
156
|
+
{
|
136
157
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName()));
|
137
158
|
}
|
138
159
|
|
139
160
|
@Override
|
140
|
-
public void stringColumn(Column column)
|
161
|
+
public void stringColumn(Column column)
|
162
|
+
{
|
141
163
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
|
142
164
|
}
|
143
165
|
|
144
166
|
@Override
|
145
|
-
public void timestampColumn(Column column)
|
167
|
+
public void timestampColumn(Column column)
|
168
|
+
{
|
146
169
|
// formatted as string
|
147
170
|
fields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName()));
|
148
171
|
}
|
149
172
|
|
150
|
-
public List<Type> getConvertedFields()
|
173
|
+
public List<Type> getConvertedFields()
|
174
|
+
{
|
151
175
|
return fields;
|
152
176
|
}
|
153
177
|
}
|
@@ -8,23 +8,28 @@ import org.embulk.spi.PageReader;
|
|
8
8
|
import org.embulk.spi.Schema;
|
9
9
|
import org.embulk.spi.time.TimestampFormatter;
|
10
10
|
|
11
|
-
public class EmbulkWriterBuilder
|
11
|
+
public class EmbulkWriterBuilder
|
12
|
+
extends ParquetWriter.Builder<PageReader, EmbulkWriterBuilder>
|
13
|
+
{
|
12
14
|
final Schema schema;
|
13
15
|
final TimestampFormatter[] timestampFormatters;
|
14
16
|
|
15
|
-
public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters)
|
17
|
+
public EmbulkWriterBuilder(Path file, Schema schema, TimestampFormatter[] timestampFormatters)
|
18
|
+
{
|
16
19
|
super(file);
|
17
20
|
this.schema = schema;
|
18
21
|
this.timestampFormatters = timestampFormatters;
|
19
22
|
}
|
20
23
|
|
21
24
|
@Override
|
22
|
-
protected EmbulkWriterBuilder self()
|
25
|
+
protected EmbulkWriterBuilder self()
|
26
|
+
{
|
23
27
|
return this;
|
24
28
|
}
|
25
29
|
|
26
30
|
@Override
|
27
|
-
protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
|
31
|
+
protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
|
32
|
+
{
|
28
33
|
return new EmbulkWriteSupport(schema, timestampFormatters);
|
29
34
|
}
|
30
35
|
}
|
@@ -30,9 +30,11 @@ import java.util.Map;
|
|
30
30
|
|
31
31
|
@SuppressWarnings("unused")
|
32
32
|
public class ParquetOutputPlugin
|
33
|
-
implements OutputPlugin
|
33
|
+
implements OutputPlugin
|
34
|
+
{
|
34
35
|
public interface PluginTask
|
35
|
-
extends Task, TimestampFormatter.Task
|
36
|
+
extends Task, TimestampFormatter.Task
|
37
|
+
{
|
36
38
|
@Config("path_prefix")
|
37
39
|
String getPathPrefix();
|
38
40
|
|
@@ -72,12 +74,14 @@ public class ParquetOutputPlugin
|
|
72
74
|
}
|
73
75
|
|
74
76
|
public interface TimestampColumnOption
|
75
|
-
extends Task, TimestampFormatter.TimestampColumnOption
|
77
|
+
extends Task, TimestampFormatter.TimestampColumnOption
|
78
|
+
{
|
76
79
|
}
|
77
80
|
|
78
81
|
public ConfigDiff transaction(ConfigSource config,
|
79
|
-
|
80
|
-
|
82
|
+
Schema schema, int processorCount,
|
83
|
+
OutputPlugin.Control control)
|
84
|
+
{
|
81
85
|
PluginTask task = config.loadConfig(PluginTask.class);
|
82
86
|
|
83
87
|
//TODO
|
@@ -87,18 +91,21 @@ public class ParquetOutputPlugin
|
|
87
91
|
}
|
88
92
|
|
89
93
|
public ConfigDiff resume(TaskSource taskSource,
|
90
|
-
|
91
|
-
|
94
|
+
Schema schema, int processorCount,
|
95
|
+
OutputPlugin.Control control)
|
96
|
+
{
|
92
97
|
throw new UnsupportedOperationException("parquet output plugin does not support resuming");
|
93
98
|
}
|
94
99
|
|
95
100
|
public void cleanup(TaskSource taskSource,
|
96
|
-
|
97
|
-
|
101
|
+
Schema schema, int processorCount,
|
102
|
+
List<TaskReport> successTaskReports)
|
103
|
+
{
|
98
104
|
//TODO
|
99
105
|
}
|
100
106
|
|
101
|
-
public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex)
|
107
|
+
public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int processorIndex)
|
108
|
+
{
|
102
109
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
103
110
|
|
104
111
|
final PageReader reader = new PageReader(schema);
|
@@ -107,14 +114,16 @@ public class ParquetOutputPlugin
|
|
107
114
|
return new ParquetTransactionalPageOutput(reader, writer);
|
108
115
|
}
|
109
116
|
|
110
|
-
private String buildPath(PluginTask task, int processorIndex)
|
117
|
+
private String buildPath(PluginTask task, int processorIndex)
|
118
|
+
{
|
111
119
|
final String pathPrefix = task.getPathPrefix();
|
112
120
|
final String pathSuffix = task.getFileNameExtension();
|
113
121
|
final String sequenceFormat = task.getSequenceFormat();
|
114
122
|
return pathPrefix + String.format(sequenceFormat, processorIndex) + pathSuffix;
|
115
123
|
}
|
116
124
|
|
117
|
-
private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex)
|
125
|
+
private ParquetWriter<PageReader> createWriter(PluginTask task, Schema schema, int processorIndex)
|
126
|
+
{
|
118
127
|
final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
|
119
128
|
|
120
129
|
final Path path = new Path(buildPath(task, processorIndex));
|
@@ -138,13 +147,15 @@ public class ParquetOutputPlugin
|
|
138
147
|
}
|
139
148
|
|
140
149
|
writer = builder.build();
|
141
|
-
}
|
150
|
+
}
|
151
|
+
catch (IOException e) {
|
142
152
|
Throwables.propagate(e);
|
143
153
|
}
|
144
154
|
return writer;
|
145
155
|
}
|
146
156
|
|
147
|
-
private Configuration createConfiguration(Map<String, String> extra)
|
157
|
+
private Configuration createConfiguration(Map<String, String> extra)
|
158
|
+
{
|
148
159
|
Configuration conf = new Configuration();
|
149
160
|
|
150
161
|
// Default values
|
@@ -161,49 +172,59 @@ public class ParquetOutputPlugin
|
|
161
172
|
return conf;
|
162
173
|
}
|
163
174
|
|
164
|
-
class ParquetTransactionalPageOutput
|
175
|
+
class ParquetTransactionalPageOutput
|
176
|
+
implements TransactionalPageOutput
|
177
|
+
{
|
165
178
|
private PageReader reader;
|
166
179
|
private ParquetWriter<PageReader> writer;
|
167
180
|
|
168
|
-
public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer)
|
181
|
+
public ParquetTransactionalPageOutput(PageReader reader, ParquetWriter<PageReader> writer)
|
182
|
+
{
|
169
183
|
this.reader = reader;
|
170
184
|
this.writer = writer;
|
171
185
|
}
|
172
186
|
|
173
187
|
@Override
|
174
|
-
public void add(Page page)
|
188
|
+
public void add(Page page)
|
189
|
+
{
|
175
190
|
try {
|
176
191
|
reader.setPage(page);
|
177
192
|
while (reader.nextRecord()) {
|
178
193
|
writer.write(reader);
|
179
194
|
}
|
180
|
-
}
|
195
|
+
}
|
196
|
+
catch (IOException e) {
|
181
197
|
Throwables.propagate(e);
|
182
198
|
}
|
183
199
|
}
|
184
200
|
|
185
201
|
@Override
|
186
|
-
public void finish()
|
202
|
+
public void finish()
|
203
|
+
{
|
187
204
|
try {
|
188
205
|
writer.close();
|
189
206
|
writer = null;
|
190
|
-
}
|
207
|
+
}
|
208
|
+
catch (IOException e) {
|
191
209
|
Throwables.propagate(e);
|
192
210
|
}
|
193
211
|
}
|
194
212
|
|
195
213
|
@Override
|
196
|
-
public void close()
|
214
|
+
public void close()
|
215
|
+
{
|
197
216
|
//TODO
|
198
217
|
}
|
199
218
|
|
200
219
|
@Override
|
201
|
-
public void abort()
|
220
|
+
public void abort()
|
221
|
+
{
|
202
222
|
//TODO
|
203
223
|
}
|
204
224
|
|
205
225
|
@Override
|
206
|
-
public TaskReport commit()
|
226
|
+
public TaskReport commit()
|
227
|
+
{
|
207
228
|
return Exec.newTaskReport();
|
208
229
|
//TODO
|
209
230
|
}
|
@@ -10,17 +10,20 @@ import org.junit.Test;
|
|
10
10
|
|
11
11
|
import java.lang.reflect.InvocationTargetException;
|
12
12
|
import java.lang.reflect.Method;
|
13
|
-
import java.util.HashMap;
|
14
13
|
import java.util.Map;
|
15
14
|
|
16
|
-
import static org.junit.Assert
|
15
|
+
import static org.junit.Assert.assertEquals;
|
16
|
+
import static org.junit.Assert.assertFalse;
|
17
|
+
import static org.junit.Assert.assertTrue;
|
17
18
|
|
18
|
-
public class ParquetOutputPluginTest
|
19
|
+
public class ParquetOutputPluginTest
|
20
|
+
{
|
19
21
|
@Rule
|
20
22
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
21
23
|
|
22
24
|
@Test
|
23
|
-
public void checkDefaultValues()
|
25
|
+
public void checkDefaultValues()
|
26
|
+
{
|
24
27
|
ConfigSource config = Exec.newConfigSource()
|
25
28
|
.set("path_prefix", "test");
|
26
29
|
|
@@ -34,14 +37,17 @@ public class ParquetOutputPluginTest {
|
|
34
37
|
}
|
35
38
|
|
36
39
|
@Test(expected = ConfigException.class)
|
37
|
-
public void checkColumnsRequired()
|
40
|
+
public void checkColumnsRequired()
|
41
|
+
{
|
38
42
|
ConfigSource config = Exec.newConfigSource();
|
39
43
|
|
40
44
|
config.loadConfig(ParquetOutputPlugin.PluginTask.class);
|
41
45
|
}
|
42
46
|
|
43
47
|
@Test
|
44
|
-
public void checkExtraConfigurations()
|
48
|
+
public void checkExtraConfigurations()
|
49
|
+
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException
|
50
|
+
{
|
45
51
|
ConfigSource map = Exec.newConfigSource()
|
46
52
|
.set("foo", "bar");
|
47
53
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OKUNO Akihiro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -50,6 +50,8 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- config/checkstyle/checkstyle.xml
|
54
|
+
- config/checkstyle/default.xml
|
53
55
|
- gradle/wrapper/gradle-wrapper.jar
|
54
56
|
- gradle/wrapper/gradle-wrapper.properties
|
55
57
|
- gradlew
|
@@ -83,7 +85,7 @@ files:
|
|
83
85
|
- classpath/curator-client-2.7.1.jar
|
84
86
|
- classpath/curator-framework-2.7.1.jar
|
85
87
|
- classpath/curator-recipes-2.7.1.jar
|
86
|
-
- classpath/embulk-output-parquet-0.
|
88
|
+
- classpath/embulk-output-parquet-0.5.0.jar
|
87
89
|
- classpath/gson-2.2.4.jar
|
88
90
|
- classpath/hadoop-annotations-2.7.1.jar
|
89
91
|
- classpath/hadoop-auth-2.7.1.jar
|
@@ -121,7 +123,7 @@ files:
|
|
121
123
|
- classpath/jetty-6.1.26.jar
|
122
124
|
- classpath/jetty-util-6.1.26.jar
|
123
125
|
- classpath/jline-0.9.94.jar
|
124
|
-
- classpath/joda-time-2.9.
|
126
|
+
- classpath/joda-time-2.9.9.jar
|
125
127
|
- classpath/jsch-0.1.42.jar
|
126
128
|
- classpath/jsp-api-2.1.jar
|
127
129
|
- classpath/jsr305-3.0.0.jar
|