embulk-output-s3_parquet 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +7 -0
- data/build.gradle +12 -13
- data/src/main/scala/org/embulk/output/s3_parquet/CatalogRegistrator.scala +178 -0
- data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetOutputPlugin.scala +166 -144
- data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetPageOutput.scala +43 -35
- data/src/main/scala/org/embulk/output/s3_parquet/aws/Aws.scala +47 -29
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsClientConfiguration.scala +22 -14
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsCredentials.scala +104 -95
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsEndpointConfiguration.scala +34 -26
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsS3Configuration.scala +39 -31
- data/src/main/scala/org/embulk/output/s3_parquet/aws/HttpProxy.scala +40 -32
- data/src/main/scala/org/embulk/output/s3_parquet/parquet/EmbulkMessageType.scala +57 -37
- data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriteSupport.scala +26 -19
- data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriter.scala +128 -94
- data/src/test/scala/org/embulk/output/s3_parquet/TestS3ParquetOutputPlugin.scala +113 -104
- metadata +18 -16
- data/.scalafmt.conf +0 -9
@@ -11,39 +11,47 @@ import org.embulk.output.s3_parquet.aws.AwsEndpointConfiguration.Task
|
|
11
11
|
|
12
12
|
import scala.util.Try
|
13
13
|
|
14
|
-
object AwsEndpointConfiguration {
|
15
14
|
|
16
|
-
|
15
|
+
object AwsEndpointConfiguration
|
16
|
+
{
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
def getEndpoint: Optional[String]
|
18
|
+
trait Task
|
19
|
+
{
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
@Config("endpoint")
|
22
|
+
@ConfigDefault("null")
|
23
|
+
def getEndpoint: Optional[String]
|
25
24
|
|
26
|
-
|
25
|
+
@Config("region")
|
26
|
+
@ConfigDefault("null")
|
27
|
+
def getRegion: Optional[String]
|
27
28
|
|
28
|
-
def apply(task: Task): AwsEndpointConfiguration = new AwsEndpointConfiguration(task)
|
29
|
-
}
|
30
|
-
|
31
|
-
class AwsEndpointConfiguration(task: Task) {
|
32
|
-
|
33
|
-
def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit = {
|
34
|
-
if (task.getRegion.isPresent && task.getEndpoint.isPresent) {
|
35
|
-
val ec = new EndpointConfiguration(task.getEndpoint.get, task.getRegion.get)
|
36
|
-
builder.setEndpointConfiguration(ec)
|
37
29
|
}
|
38
|
-
|
39
|
-
|
30
|
+
|
31
|
+
def apply(task: Task): AwsEndpointConfiguration =
|
32
|
+
{
|
33
|
+
new AwsEndpointConfiguration(task)
|
40
34
|
}
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
35
|
+
}
|
36
|
+
|
37
|
+
class AwsEndpointConfiguration(task: Task)
|
38
|
+
{
|
39
|
+
|
40
|
+
def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit =
|
41
|
+
{
|
42
|
+
if (task.getRegion.isPresent && task.getEndpoint.isPresent) {
|
43
|
+
val ec = new EndpointConfiguration(task.getEndpoint.get, task.getRegion.get)
|
44
|
+
builder.setEndpointConfiguration(ec)
|
45
|
+
}
|
46
|
+
else if (task.getRegion.isPresent && !task.getEndpoint.isPresent) {
|
47
|
+
builder.setRegion(task.getRegion.get)
|
48
|
+
}
|
49
|
+
else if (!task.getRegion.isPresent && task.getEndpoint.isPresent) {
|
50
|
+
val r: String = Try(new DefaultAwsRegionProviderChain().getRegion).getOrElse(Regions.DEFAULT_REGION.getName)
|
51
|
+
val e: String = task.getEndpoint.get
|
52
|
+
val ec = new EndpointConfiguration(e, r)
|
53
|
+
builder.setEndpointConfiguration(ec)
|
54
|
+
}
|
46
55
|
}
|
47
|
-
}
|
48
56
|
|
49
57
|
}
|
@@ -7,50 +7,58 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder
|
|
7
7
|
import org.embulk.config.{Config, ConfigDefault}
|
8
8
|
import org.embulk.output.s3_parquet.aws.AwsS3Configuration.Task
|
9
9
|
|
10
|
+
|
10
11
|
/*
|
11
12
|
* These are advanced settings, so write no documentation.
|
12
13
|
*/
|
13
|
-
object AwsS3Configuration
|
14
|
-
|
14
|
+
object AwsS3Configuration
|
15
|
+
{
|
16
|
+
trait Task
|
17
|
+
{
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
+
@Config("accelerate_mode_enabled")
|
20
|
+
@ConfigDefault("null")
|
21
|
+
def getAccelerateModeEnabled: Optional[Boolean]
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
@Config("chunked_encoding_disabled")
|
24
|
+
@ConfigDefault("null")
|
25
|
+
def getChunkedEncodingDisabled: Optional[Boolean]
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
@Config("dualstack_enabled")
|
28
|
+
@ConfigDefault("null")
|
29
|
+
def getDualstackEnabled: Optional[Boolean]
|
27
30
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
@Config("force_global_bucket_access_enabled")
|
32
|
+
@ConfigDefault("null")
|
33
|
+
def getForceGlobalBucketAccessEnabled: Optional[Boolean]
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
+
@Config("path_style_access_enabled")
|
36
|
+
@ConfigDefault("null")
|
37
|
+
def getPathStyleAccessEnabled: Optional[Boolean]
|
35
38
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
+
@Config("payload_signing_enabled")
|
40
|
+
@ConfigDefault("null")
|
41
|
+
def getPayloadSigningEnabled: Optional[Boolean]
|
39
42
|
|
40
|
-
|
43
|
+
}
|
41
44
|
|
42
|
-
|
45
|
+
def apply(task: Task): AwsS3Configuration =
|
46
|
+
{
|
47
|
+
new AwsS3Configuration(task)
|
48
|
+
}
|
43
49
|
}
|
44
50
|
|
45
|
-
class AwsS3Configuration(task: Task)
|
51
|
+
class AwsS3Configuration(task: Task)
|
52
|
+
{
|
46
53
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
def configureAmazonS3ClientBuilder(builder: AmazonS3ClientBuilder): Unit =
|
55
|
+
{
|
56
|
+
task.getAccelerateModeEnabled.ifPresent(v => builder.setAccelerateModeEnabled(v))
|
57
|
+
task.getChunkedEncodingDisabled.ifPresent(v => builder.setChunkedEncodingDisabled(v))
|
58
|
+
task.getDualstackEnabled.ifPresent(v => builder.setDualstackEnabled(v))
|
59
|
+
task.getForceGlobalBucketAccessEnabled.ifPresent(v => builder.setForceGlobalBucketAccessEnabled(v))
|
60
|
+
task.getPathStyleAccessEnabled.ifPresent(v => builder.setPathStyleAccessEnabled(v))
|
61
|
+
task.getPayloadSigningEnabled.ifPresent(v => builder.setPayloadSigningEnabled(v))
|
62
|
+
}
|
55
63
|
|
56
64
|
}
|
@@ -7,50 +7,58 @@ import com.amazonaws.{ClientConfiguration, Protocol}
|
|
7
7
|
import org.embulk.config.{Config, ConfigDefault, ConfigException}
|
8
8
|
import org.embulk.output.s3_parquet.aws.HttpProxy.Task
|
9
9
|
|
10
|
-
object HttpProxy {
|
11
10
|
|
12
|
-
|
11
|
+
object HttpProxy
|
12
|
+
{
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
def getHost: Optional[String]
|
14
|
+
trait Task
|
15
|
+
{
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
@Config("host")
|
18
|
+
@ConfigDefault("null")
|
19
|
+
def getHost: Optional[String]
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
@Config("port")
|
22
|
+
@ConfigDefault("null")
|
23
|
+
def getPort: Optional[Int]
|
25
24
|
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
@Config("protocol")
|
26
|
+
@ConfigDefault("\"https\"")
|
27
|
+
def getProtocol: String
|
29
28
|
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
@Config("user")
|
30
|
+
@ConfigDefault("null")
|
31
|
+
def getUser: Optional[String]
|
33
32
|
|
34
|
-
|
33
|
+
@Config("password")
|
34
|
+
@ConfigDefault("null")
|
35
|
+
def getPassword: Optional[String]
|
35
36
|
|
36
|
-
|
37
|
+
}
|
38
|
+
|
39
|
+
def apply(task: Task): HttpProxy =
|
40
|
+
{
|
41
|
+
new HttpProxy(task)
|
42
|
+
}
|
37
43
|
|
38
44
|
}
|
39
45
|
|
40
|
-
class HttpProxy(task: Task)
|
46
|
+
class HttpProxy(task: Task)
|
47
|
+
{
|
41
48
|
|
42
|
-
|
43
|
-
|
44
|
-
|
49
|
+
def configureClientConfiguration(cc: ClientConfiguration): Unit =
|
50
|
+
{
|
51
|
+
task.getHost.ifPresent(v => cc.setProxyHost(v))
|
52
|
+
task.getPort.ifPresent(v => cc.setProxyPort(v))
|
45
53
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
54
|
+
Protocol.values.find(p => p.name().equals(task.getProtocol)) match {
|
55
|
+
case Some(v) =>
|
56
|
+
cc.setProtocol(v)
|
57
|
+
case None =>
|
58
|
+
throw new ConfigException(s"'${task.getProtocol}' is unsupported: `protocol` must be one of [${Protocol.values.map(v => s"'$v'").mkString(", ")}].")
|
59
|
+
}
|
52
60
|
|
53
|
-
|
54
|
-
|
55
|
-
|
61
|
+
task.getUser.ifPresent(v => cc.setProxyUsername(v))
|
62
|
+
task.getPassword.ifPresent(v => cc.setProxyPassword(v))
|
63
|
+
}
|
56
64
|
}
|
@@ -6,54 +6,74 @@ import org.apache.parquet.schema.{MessageType, OriginalType, PrimitiveType, Type
|
|
6
6
|
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
|
7
7
|
import org.embulk.spi.{Column, ColumnVisitor, Schema}
|
8
8
|
|
9
|
-
object EmbulkMessageType {
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
case class Builder(name: String = "embulk",
|
14
|
-
schema: Schema = Schema.builder().build()) {
|
15
|
-
|
16
|
-
def withName(name: String): Builder = Builder(name = name, schema = schema)
|
17
|
-
|
18
|
-
def withSchema(schema: Schema): Builder = Builder(name = name, schema = schema)
|
19
|
-
|
20
|
-
def build(): MessageType = {
|
21
|
-
val builder: ImmutableList.Builder[Type] = ImmutableList.builder[Type]()
|
22
|
-
schema.visitColumns(EmbulkMessageTypeColumnVisitor(builder))
|
23
|
-
new MessageType("embulk", builder.build())
|
10
|
+
object EmbulkMessageType
|
11
|
+
{
|
24
12
|
|
13
|
+
def builder(): Builder =
|
14
|
+
{
|
15
|
+
Builder()
|
25
16
|
}
|
26
17
|
|
27
|
-
|
18
|
+
case class Builder(name: String = "embulk",
|
19
|
+
schema: Schema = Schema.builder().build())
|
20
|
+
{
|
28
21
|
|
29
|
-
|
30
|
-
|
22
|
+
def withName(name: String): Builder =
|
23
|
+
{
|
24
|
+
Builder(name = name, schema = schema)
|
25
|
+
}
|
31
26
|
|
32
|
-
|
33
|
-
|
34
|
-
|
27
|
+
def withSchema(schema: Schema): Builder =
|
28
|
+
{
|
29
|
+
Builder(name = name, schema = schema)
|
30
|
+
}
|
35
31
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName))
|
42
|
-
}
|
32
|
+
def build(): MessageType =
|
33
|
+
{
|
34
|
+
val builder: ImmutableList.Builder[Type] = ImmutableList.builder[Type]()
|
35
|
+
schema.visitColumns(EmbulkMessageTypeColumnVisitor(builder))
|
36
|
+
new MessageType("embulk", builder.build())
|
43
37
|
|
44
|
-
|
45
|
-
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
46
|
-
}
|
38
|
+
}
|
47
39
|
|
48
|
-
override def timestampColumn(column: Column): Unit = {
|
49
|
-
// TODO: Support OriginalType.TIME* ?
|
50
|
-
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
51
40
|
}
|
52
41
|
|
53
|
-
|
54
|
-
|
55
|
-
|
42
|
+
private case class EmbulkMessageTypeColumnVisitor(builder: ImmutableList.Builder[Type])
|
43
|
+
extends ColumnVisitor
|
44
|
+
{
|
45
|
+
|
46
|
+
override def booleanColumn(column: Column): Unit =
|
47
|
+
{
|
48
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName))
|
49
|
+
}
|
50
|
+
|
51
|
+
override def longColumn(column: Column): Unit =
|
52
|
+
{
|
53
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName))
|
54
|
+
}
|
55
|
+
|
56
|
+
override def doubleColumn(column: Column): Unit =
|
57
|
+
{
|
58
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName))
|
59
|
+
}
|
60
|
+
|
61
|
+
override def stringColumn(column: Column): Unit =
|
62
|
+
{
|
63
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
64
|
+
}
|
65
|
+
|
66
|
+
override def timestampColumn(column: Column): Unit =
|
67
|
+
{
|
68
|
+
// TODO: Support OriginalType.TIME* ?
|
69
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
70
|
+
}
|
71
|
+
|
72
|
+
override def jsonColumn(column: Column): Unit =
|
73
|
+
{
|
74
|
+
// TODO: does this work?
|
75
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
76
|
+
}
|
56
77
|
}
|
57
|
-
}
|
58
78
|
|
59
79
|
}
|
@@ -9,25 +9,32 @@ import org.apache.parquet.schema.MessageType
|
|
9
9
|
import org.embulk.spi.{PageReader, Schema}
|
10
10
|
import org.embulk.spi.time.TimestampFormatter
|
11
11
|
|
12
|
-
import scala.
|
12
|
+
import scala.jdk.CollectionConverters._
|
13
13
|
|
14
|
-
private[parquet] case class ParquetFileWriteSupport(schema: Schema,
|
15
|
-
timestampFormatters: Seq[TimestampFormatter])
|
16
|
-
extends WriteSupport[PageReader] {
|
17
|
-
|
18
|
-
private var currentParquetFileWriter: ParquetFileWriter = _
|
19
|
-
|
20
|
-
override def init(configuration: Configuration): WriteContext = {
|
21
|
-
val messageType: MessageType = EmbulkMessageType.builder()
|
22
|
-
.withSchema(schema)
|
23
|
-
.build()
|
24
|
-
val metadata: Map[String, String] = Map.empty // NOTE: When is this used?
|
25
|
-
new WriteContext(messageType, metadata.asJava)
|
26
|
-
}
|
27
14
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
15
|
+
private[parquet] case class ParquetFileWriteSupport(schema: Schema,
|
16
|
+
timestampFormatters: Seq[TimestampFormatter])
|
17
|
+
extends WriteSupport[PageReader]
|
18
|
+
{
|
19
|
+
|
20
|
+
private var currentParquetFileWriter: ParquetFileWriter = _
|
21
|
+
|
22
|
+
override def init(configuration: Configuration): WriteContext =
|
23
|
+
{
|
24
|
+
val messageType: MessageType = EmbulkMessageType.builder()
|
25
|
+
.withSchema(schema)
|
26
|
+
.build()
|
27
|
+
val metadata: Map[String, String] = Map.empty // NOTE: When is this used?
|
28
|
+
new WriteContext(messageType, metadata.asJava)
|
29
|
+
}
|
30
|
+
|
31
|
+
override def prepareForWrite(recordConsumer: RecordConsumer): Unit =
|
32
|
+
{
|
33
|
+
currentParquetFileWriter = ParquetFileWriter(recordConsumer, schema, timestampFormatters)
|
34
|
+
}
|
35
|
+
|
36
|
+
override def write(record: PageReader): Unit =
|
37
|
+
{
|
38
|
+
currentParquetFileWriter.write(record)
|
39
|
+
}
|
33
40
|
}
|
@@ -9,117 +9,151 @@ import org.apache.parquet.io.api.{Binary, RecordConsumer}
|
|
9
9
|
import org.embulk.spi.{Column, ColumnVisitor, PageReader, Schema}
|
10
10
|
import org.embulk.spi.time.TimestampFormatter
|
11
11
|
|
12
|
-
object ParquetFileWriter {
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
timestampFormatters: Seq[TimestampFormatter] = null)
|
17
|
-
extends ParquetWriter.Builder[PageReader, Builder](path) {
|
13
|
+
object ParquetFileWriter
|
14
|
+
{
|
18
15
|
|
19
|
-
|
16
|
+
case class Builder(path: Path = null,
|
17
|
+
schema: Schema = null,
|
18
|
+
timestampFormatters: Seq[TimestampFormatter] = null)
|
19
|
+
extends ParquetWriter.Builder[PageReader, Builder](path)
|
20
|
+
{
|
20
21
|
|
21
|
-
|
22
|
+
def withPath(path: Path): Builder =
|
23
|
+
{
|
24
|
+
copy(path = path)
|
25
|
+
}
|
22
26
|
|
23
|
-
|
27
|
+
def withPath(pathString: String): Builder =
|
28
|
+
{
|
29
|
+
copy(path = new Path(pathString))
|
30
|
+
}
|
24
31
|
|
25
|
-
|
32
|
+
def withSchema(schema: Schema): Builder =
|
33
|
+
{
|
34
|
+
copy(schema = schema)
|
35
|
+
}
|
26
36
|
|
27
|
-
|
37
|
+
def withTimestampFormatters(timestampFormatters: Seq[TimestampFormatter]): Builder =
|
38
|
+
{
|
39
|
+
copy(timestampFormatters = timestampFormatters)
|
40
|
+
}
|
28
41
|
|
29
|
-
|
30
|
-
|
42
|
+
override def self(): Builder =
|
43
|
+
{
|
44
|
+
this
|
45
|
+
}
|
46
|
+
|
47
|
+
override def getWriteSupport(conf: Configuration): WriteSupport[PageReader] =
|
48
|
+
{
|
49
|
+
ParquetFileWriteSupport(schema, timestampFormatters)
|
50
|
+
}
|
31
51
|
}
|
32
|
-
}
|
33
52
|
|
34
|
-
def builder(): Builder =
|
53
|
+
def builder(): Builder =
|
54
|
+
{
|
55
|
+
Builder()
|
56
|
+
}
|
35
57
|
|
36
58
|
}
|
37
59
|
|
38
60
|
|
39
61
|
private[parquet] case class ParquetFileWriter(recordConsumer: RecordConsumer,
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
schema.visitColumns(new ColumnVisitor() {
|
52
|
-
|
53
|
-
override def booleanColumn(column: Column): Unit = {
|
54
|
-
nullOr(column, {
|
55
|
-
withWriteFieldContext(column, {
|
56
|
-
recordConsumer.addBoolean(record.getBoolean(column))
|
57
|
-
})
|
58
|
-
})
|
59
|
-
}
|
60
|
-
|
61
|
-
override def longColumn(column: Column): Unit = {
|
62
|
-
nullOr(column, {
|
63
|
-
withWriteFieldContext(column, {
|
64
|
-
recordConsumer.addLong(record.getLong(column))
|
65
|
-
})
|
66
|
-
})
|
67
|
-
}
|
62
|
+
schema: Schema,
|
63
|
+
timestampFormatters: Seq[TimestampFormatter])
|
64
|
+
{
|
65
|
+
|
66
|
+
def write(record: PageReader): Unit =
|
67
|
+
{
|
68
|
+
recordConsumer.startMessage()
|
69
|
+
writeRecord(record)
|
70
|
+
recordConsumer.endMessage()
|
71
|
+
}
|
68
72
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
private def writeRecord(record: PageReader): Unit =
|
74
|
+
{
|
75
|
+
|
76
|
+
schema.visitColumns(new ColumnVisitor()
|
77
|
+
{
|
78
|
+
|
79
|
+
override def booleanColumn(column: Column): Unit =
|
80
|
+
{
|
81
|
+
nullOr(column, {
|
82
|
+
withWriteFieldContext(column, {
|
83
|
+
recordConsumer.addBoolean(record.getBoolean(column))
|
84
|
+
})
|
85
|
+
})
|
86
|
+
}
|
87
|
+
|
88
|
+
override def longColumn(column: Column): Unit =
|
89
|
+
{
|
90
|
+
nullOr(column, {
|
91
|
+
withWriteFieldContext(column, {
|
92
|
+
recordConsumer.addLong(record.getLong(column))
|
93
|
+
})
|
94
|
+
})
|
95
|
+
}
|
96
|
+
|
97
|
+
override def doubleColumn(column: Column): Unit =
|
98
|
+
{
|
99
|
+
nullOr(column, {
|
100
|
+
withWriteFieldContext(column, {
|
101
|
+
recordConsumer.addDouble(record.getDouble(column))
|
102
|
+
})
|
103
|
+
})
|
104
|
+
}
|
105
|
+
|
106
|
+
override def stringColumn(column: Column): Unit =
|
107
|
+
{
|
108
|
+
nullOr(column, {
|
109
|
+
withWriteFieldContext(column, {
|
110
|
+
val bin = Binary.fromString(record.getString(column))
|
111
|
+
recordConsumer.addBinary(bin)
|
112
|
+
})
|
113
|
+
})
|
114
|
+
}
|
115
|
+
|
116
|
+
override def timestampColumn(column: Column): Unit =
|
117
|
+
{
|
118
|
+
nullOr(column, {
|
119
|
+
withWriteFieldContext(column, {
|
120
|
+
// TODO: is a correct way to convert for parquet ?
|
121
|
+
val t = record.getTimestamp(column)
|
122
|
+
val ft = timestampFormatters(column.getIndex).format(t)
|
123
|
+
val bin = Binary.fromString(ft)
|
124
|
+
recordConsumer.addBinary(bin)
|
125
|
+
})
|
126
|
+
})
|
127
|
+
}
|
128
|
+
|
129
|
+
override def jsonColumn(column: Column): Unit =
|
130
|
+
{
|
131
|
+
nullOr(column, {
|
132
|
+
withWriteFieldContext(column, {
|
133
|
+
// TODO: is a correct way to convert for parquet ?
|
134
|
+
val msgPack = record.getJson(column)
|
135
|
+
val bin = Binary.fromString(msgPack.toJson)
|
136
|
+
recordConsumer.addBinary(bin)
|
137
|
+
})
|
138
|
+
})
|
139
|
+
}
|
140
|
+
|
141
|
+
private def nullOr(column: Column,
|
142
|
+
f: => Unit): Unit =
|
143
|
+
{
|
144
|
+
if (!record.isNull(column)) f
|
145
|
+
}
|
146
|
+
|
147
|
+
private def withWriteFieldContext(column: Column,
|
148
|
+
f: => Unit): Unit =
|
149
|
+
{
|
150
|
+
recordConsumer.startField(column.getName, column.getIndex)
|
151
|
+
f
|
152
|
+
recordConsumer.endField(column.getName, column.getIndex)
|
153
|
+
}
|
76
154
|
|
77
|
-
override def stringColumn(column: Column): Unit = {
|
78
|
-
nullOr(column, {
|
79
|
-
withWriteFieldContext(column, {
|
80
|
-
val bin = Binary.fromString(record.getString(column))
|
81
|
-
recordConsumer.addBinary(bin)
|
82
|
-
})
|
83
155
|
})
|
84
|
-
}
|
85
156
|
|
86
|
-
|
87
|
-
nullOr(column, {
|
88
|
-
withWriteFieldContext(column, {
|
89
|
-
// TODO: is a correct way to convert for parquet ?
|
90
|
-
val t = record.getTimestamp(column)
|
91
|
-
val ft = timestampFormatters(column.getIndex).format(t)
|
92
|
-
val bin = Binary.fromString(ft)
|
93
|
-
recordConsumer.addBinary(bin)
|
94
|
-
})
|
95
|
-
})
|
96
|
-
}
|
97
|
-
|
98
|
-
override def jsonColumn(column: Column): Unit = {
|
99
|
-
nullOr(column, {
|
100
|
-
withWriteFieldContext(column, {
|
101
|
-
// TODO: is a correct way to convert for parquet ?
|
102
|
-
val msgPack = record.getJson(column)
|
103
|
-
val bin = Binary.fromString(msgPack.toJson)
|
104
|
-
recordConsumer.addBinary(bin)
|
105
|
-
})
|
106
|
-
})
|
107
|
-
}
|
108
|
-
|
109
|
-
private def nullOr(column: Column,
|
110
|
-
f: => Unit): Unit = {
|
111
|
-
if (!record.isNull(column)) f
|
112
|
-
}
|
113
|
-
|
114
|
-
private def withWriteFieldContext(column: Column,
|
115
|
-
f: => Unit): Unit = {
|
116
|
-
recordConsumer.startField(column.getName, column.getIndex)
|
117
|
-
f
|
118
|
-
recordConsumer.endField(column.getName, column.getIndex)
|
119
|
-
}
|
120
|
-
|
121
|
-
})
|
122
|
-
|
123
|
-
}
|
157
|
+
}
|
124
158
|
|
125
159
|
}
|