embulk-output-s3_parquet 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +7 -0
- data/build.gradle +12 -13
- data/src/main/scala/org/embulk/output/s3_parquet/CatalogRegistrator.scala +178 -0
- data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetOutputPlugin.scala +166 -144
- data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetPageOutput.scala +43 -35
- data/src/main/scala/org/embulk/output/s3_parquet/aws/Aws.scala +47 -29
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsClientConfiguration.scala +22 -14
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsCredentials.scala +104 -95
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsEndpointConfiguration.scala +34 -26
- data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsS3Configuration.scala +39 -31
- data/src/main/scala/org/embulk/output/s3_parquet/aws/HttpProxy.scala +40 -32
- data/src/main/scala/org/embulk/output/s3_parquet/parquet/EmbulkMessageType.scala +57 -37
- data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriteSupport.scala +26 -19
- data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriter.scala +128 -94
- data/src/test/scala/org/embulk/output/s3_parquet/TestS3ParquetOutputPlugin.scala +113 -104
- metadata +18 -16
- data/.scalafmt.conf +0 -9
@@ -11,39 +11,47 @@ import org.embulk.output.s3_parquet.aws.AwsEndpointConfiguration.Task
|
|
11
11
|
|
12
12
|
import scala.util.Try
|
13
13
|
|
14
|
-
object AwsEndpointConfiguration {
|
15
14
|
|
16
|
-
|
15
|
+
object AwsEndpointConfiguration
|
16
|
+
{
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
def getEndpoint: Optional[String]
|
18
|
+
trait Task
|
19
|
+
{
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
@Config("endpoint")
|
22
|
+
@ConfigDefault("null")
|
23
|
+
def getEndpoint: Optional[String]
|
25
24
|
|
26
|
-
|
25
|
+
@Config("region")
|
26
|
+
@ConfigDefault("null")
|
27
|
+
def getRegion: Optional[String]
|
27
28
|
|
28
|
-
def apply(task: Task): AwsEndpointConfiguration = new AwsEndpointConfiguration(task)
|
29
|
-
}
|
30
|
-
|
31
|
-
class AwsEndpointConfiguration(task: Task) {
|
32
|
-
|
33
|
-
def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit = {
|
34
|
-
if (task.getRegion.isPresent && task.getEndpoint.isPresent) {
|
35
|
-
val ec = new EndpointConfiguration(task.getEndpoint.get, task.getRegion.get)
|
36
|
-
builder.setEndpointConfiguration(ec)
|
37
29
|
}
|
38
|
-
|
39
|
-
|
30
|
+
|
31
|
+
def apply(task: Task): AwsEndpointConfiguration =
|
32
|
+
{
|
33
|
+
new AwsEndpointConfiguration(task)
|
40
34
|
}
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
35
|
+
}
|
36
|
+
|
37
|
+
class AwsEndpointConfiguration(task: Task)
|
38
|
+
{
|
39
|
+
|
40
|
+
def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit =
|
41
|
+
{
|
42
|
+
if (task.getRegion.isPresent && task.getEndpoint.isPresent) {
|
43
|
+
val ec = new EndpointConfiguration(task.getEndpoint.get, task.getRegion.get)
|
44
|
+
builder.setEndpointConfiguration(ec)
|
45
|
+
}
|
46
|
+
else if (task.getRegion.isPresent && !task.getEndpoint.isPresent) {
|
47
|
+
builder.setRegion(task.getRegion.get)
|
48
|
+
}
|
49
|
+
else if (!task.getRegion.isPresent && task.getEndpoint.isPresent) {
|
50
|
+
val r: String = Try(new DefaultAwsRegionProviderChain().getRegion).getOrElse(Regions.DEFAULT_REGION.getName)
|
51
|
+
val e: String = task.getEndpoint.get
|
52
|
+
val ec = new EndpointConfiguration(e, r)
|
53
|
+
builder.setEndpointConfiguration(ec)
|
54
|
+
}
|
46
55
|
}
|
47
|
-
}
|
48
56
|
|
49
57
|
}
|
@@ -7,50 +7,58 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder
|
|
7
7
|
import org.embulk.config.{Config, ConfigDefault}
|
8
8
|
import org.embulk.output.s3_parquet.aws.AwsS3Configuration.Task
|
9
9
|
|
10
|
+
|
10
11
|
/*
|
11
12
|
* These are advanced settings, so write no documentation.
|
12
13
|
*/
|
13
|
-
object AwsS3Configuration
|
14
|
-
|
14
|
+
object AwsS3Configuration
|
15
|
+
{
|
16
|
+
trait Task
|
17
|
+
{
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
+
@Config("accelerate_mode_enabled")
|
20
|
+
@ConfigDefault("null")
|
21
|
+
def getAccelerateModeEnabled: Optional[Boolean]
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
@Config("chunked_encoding_disabled")
|
24
|
+
@ConfigDefault("null")
|
25
|
+
def getChunkedEncodingDisabled: Optional[Boolean]
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
@Config("dualstack_enabled")
|
28
|
+
@ConfigDefault("null")
|
29
|
+
def getDualstackEnabled: Optional[Boolean]
|
27
30
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
@Config("force_global_bucket_access_enabled")
|
32
|
+
@ConfigDefault("null")
|
33
|
+
def getForceGlobalBucketAccessEnabled: Optional[Boolean]
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
+
@Config("path_style_access_enabled")
|
36
|
+
@ConfigDefault("null")
|
37
|
+
def getPathStyleAccessEnabled: Optional[Boolean]
|
35
38
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
+
@Config("payload_signing_enabled")
|
40
|
+
@ConfigDefault("null")
|
41
|
+
def getPayloadSigningEnabled: Optional[Boolean]
|
39
42
|
|
40
|
-
|
43
|
+
}
|
41
44
|
|
42
|
-
|
45
|
+
def apply(task: Task): AwsS3Configuration =
|
46
|
+
{
|
47
|
+
new AwsS3Configuration(task)
|
48
|
+
}
|
43
49
|
}
|
44
50
|
|
45
|
-
class AwsS3Configuration(task: Task)
|
51
|
+
class AwsS3Configuration(task: Task)
|
52
|
+
{
|
46
53
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
def configureAmazonS3ClientBuilder(builder: AmazonS3ClientBuilder): Unit =
|
55
|
+
{
|
56
|
+
task.getAccelerateModeEnabled.ifPresent(v => builder.setAccelerateModeEnabled(v))
|
57
|
+
task.getChunkedEncodingDisabled.ifPresent(v => builder.setChunkedEncodingDisabled(v))
|
58
|
+
task.getDualstackEnabled.ifPresent(v => builder.setDualstackEnabled(v))
|
59
|
+
task.getForceGlobalBucketAccessEnabled.ifPresent(v => builder.setForceGlobalBucketAccessEnabled(v))
|
60
|
+
task.getPathStyleAccessEnabled.ifPresent(v => builder.setPathStyleAccessEnabled(v))
|
61
|
+
task.getPayloadSigningEnabled.ifPresent(v => builder.setPayloadSigningEnabled(v))
|
62
|
+
}
|
55
63
|
|
56
64
|
}
|
@@ -7,50 +7,58 @@ import com.amazonaws.{ClientConfiguration, Protocol}
|
|
7
7
|
import org.embulk.config.{Config, ConfigDefault, ConfigException}
|
8
8
|
import org.embulk.output.s3_parquet.aws.HttpProxy.Task
|
9
9
|
|
10
|
-
object HttpProxy {
|
11
10
|
|
12
|
-
|
11
|
+
object HttpProxy
|
12
|
+
{
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
def getHost: Optional[String]
|
14
|
+
trait Task
|
15
|
+
{
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
@Config("host")
|
18
|
+
@ConfigDefault("null")
|
19
|
+
def getHost: Optional[String]
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
@Config("port")
|
22
|
+
@ConfigDefault("null")
|
23
|
+
def getPort: Optional[Int]
|
25
24
|
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
@Config("protocol")
|
26
|
+
@ConfigDefault("\"https\"")
|
27
|
+
def getProtocol: String
|
29
28
|
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
@Config("user")
|
30
|
+
@ConfigDefault("null")
|
31
|
+
def getUser: Optional[String]
|
33
32
|
|
34
|
-
|
33
|
+
@Config("password")
|
34
|
+
@ConfigDefault("null")
|
35
|
+
def getPassword: Optional[String]
|
35
36
|
|
36
|
-
|
37
|
+
}
|
38
|
+
|
39
|
+
def apply(task: Task): HttpProxy =
|
40
|
+
{
|
41
|
+
new HttpProxy(task)
|
42
|
+
}
|
37
43
|
|
38
44
|
}
|
39
45
|
|
40
|
-
class HttpProxy(task: Task)
|
46
|
+
class HttpProxy(task: Task)
|
47
|
+
{
|
41
48
|
|
42
|
-
|
43
|
-
|
44
|
-
|
49
|
+
def configureClientConfiguration(cc: ClientConfiguration): Unit =
|
50
|
+
{
|
51
|
+
task.getHost.ifPresent(v => cc.setProxyHost(v))
|
52
|
+
task.getPort.ifPresent(v => cc.setProxyPort(v))
|
45
53
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
54
|
+
Protocol.values.find(p => p.name().equals(task.getProtocol)) match {
|
55
|
+
case Some(v) =>
|
56
|
+
cc.setProtocol(v)
|
57
|
+
case None =>
|
58
|
+
throw new ConfigException(s"'${task.getProtocol}' is unsupported: `protocol` must be one of [${Protocol.values.map(v => s"'$v'").mkString(", ")}].")
|
59
|
+
}
|
52
60
|
|
53
|
-
|
54
|
-
|
55
|
-
|
61
|
+
task.getUser.ifPresent(v => cc.setProxyUsername(v))
|
62
|
+
task.getPassword.ifPresent(v => cc.setProxyPassword(v))
|
63
|
+
}
|
56
64
|
}
|
@@ -6,54 +6,74 @@ import org.apache.parquet.schema.{MessageType, OriginalType, PrimitiveType, Type
|
|
6
6
|
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
|
7
7
|
import org.embulk.spi.{Column, ColumnVisitor, Schema}
|
8
8
|
|
9
|
-
object EmbulkMessageType {
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
case class Builder(name: String = "embulk",
|
14
|
-
schema: Schema = Schema.builder().build()) {
|
15
|
-
|
16
|
-
def withName(name: String): Builder = Builder(name = name, schema = schema)
|
17
|
-
|
18
|
-
def withSchema(schema: Schema): Builder = Builder(name = name, schema = schema)
|
19
|
-
|
20
|
-
def build(): MessageType = {
|
21
|
-
val builder: ImmutableList.Builder[Type] = ImmutableList.builder[Type]()
|
22
|
-
schema.visitColumns(EmbulkMessageTypeColumnVisitor(builder))
|
23
|
-
new MessageType("embulk", builder.build())
|
10
|
+
object EmbulkMessageType
|
11
|
+
{
|
24
12
|
|
13
|
+
def builder(): Builder =
|
14
|
+
{
|
15
|
+
Builder()
|
25
16
|
}
|
26
17
|
|
27
|
-
|
18
|
+
case class Builder(name: String = "embulk",
|
19
|
+
schema: Schema = Schema.builder().build())
|
20
|
+
{
|
28
21
|
|
29
|
-
|
30
|
-
|
22
|
+
def withName(name: String): Builder =
|
23
|
+
{
|
24
|
+
Builder(name = name, schema = schema)
|
25
|
+
}
|
31
26
|
|
32
|
-
|
33
|
-
|
34
|
-
|
27
|
+
def withSchema(schema: Schema): Builder =
|
28
|
+
{
|
29
|
+
Builder(name = name, schema = schema)
|
30
|
+
}
|
35
31
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName))
|
42
|
-
}
|
32
|
+
def build(): MessageType =
|
33
|
+
{
|
34
|
+
val builder: ImmutableList.Builder[Type] = ImmutableList.builder[Type]()
|
35
|
+
schema.visitColumns(EmbulkMessageTypeColumnVisitor(builder))
|
36
|
+
new MessageType("embulk", builder.build())
|
43
37
|
|
44
|
-
|
45
|
-
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
46
|
-
}
|
38
|
+
}
|
47
39
|
|
48
|
-
override def timestampColumn(column: Column): Unit = {
|
49
|
-
// TODO: Support OriginalType.TIME* ?
|
50
|
-
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
51
40
|
}
|
52
41
|
|
53
|
-
|
54
|
-
|
55
|
-
|
42
|
+
private case class EmbulkMessageTypeColumnVisitor(builder: ImmutableList.Builder[Type])
|
43
|
+
extends ColumnVisitor
|
44
|
+
{
|
45
|
+
|
46
|
+
override def booleanColumn(column: Column): Unit =
|
47
|
+
{
|
48
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName))
|
49
|
+
}
|
50
|
+
|
51
|
+
override def longColumn(column: Column): Unit =
|
52
|
+
{
|
53
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName))
|
54
|
+
}
|
55
|
+
|
56
|
+
override def doubleColumn(column: Column): Unit =
|
57
|
+
{
|
58
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName))
|
59
|
+
}
|
60
|
+
|
61
|
+
override def stringColumn(column: Column): Unit =
|
62
|
+
{
|
63
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
64
|
+
}
|
65
|
+
|
66
|
+
override def timestampColumn(column: Column): Unit =
|
67
|
+
{
|
68
|
+
// TODO: Support OriginalType.TIME* ?
|
69
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
70
|
+
}
|
71
|
+
|
72
|
+
override def jsonColumn(column: Column): Unit =
|
73
|
+
{
|
74
|
+
// TODO: does this work?
|
75
|
+
builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
|
76
|
+
}
|
56
77
|
}
|
57
|
-
}
|
58
78
|
|
59
79
|
}
|
@@ -9,25 +9,32 @@ import org.apache.parquet.schema.MessageType
|
|
9
9
|
import org.embulk.spi.{PageReader, Schema}
|
10
10
|
import org.embulk.spi.time.TimestampFormatter
|
11
11
|
|
12
|
-
import scala.
|
12
|
+
import scala.jdk.CollectionConverters._
|
13
13
|
|
14
|
-
private[parquet] case class ParquetFileWriteSupport(schema: Schema,
|
15
|
-
timestampFormatters: Seq[TimestampFormatter])
|
16
|
-
extends WriteSupport[PageReader] {
|
17
|
-
|
18
|
-
private var currentParquetFileWriter: ParquetFileWriter = _
|
19
|
-
|
20
|
-
override def init(configuration: Configuration): WriteContext = {
|
21
|
-
val messageType: MessageType = EmbulkMessageType.builder()
|
22
|
-
.withSchema(schema)
|
23
|
-
.build()
|
24
|
-
val metadata: Map[String, String] = Map.empty // NOTE: When is this used?
|
25
|
-
new WriteContext(messageType, metadata.asJava)
|
26
|
-
}
|
27
14
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
15
|
+
private[parquet] case class ParquetFileWriteSupport(schema: Schema,
|
16
|
+
timestampFormatters: Seq[TimestampFormatter])
|
17
|
+
extends WriteSupport[PageReader]
|
18
|
+
{
|
19
|
+
|
20
|
+
private var currentParquetFileWriter: ParquetFileWriter = _
|
21
|
+
|
22
|
+
override def init(configuration: Configuration): WriteContext =
|
23
|
+
{
|
24
|
+
val messageType: MessageType = EmbulkMessageType.builder()
|
25
|
+
.withSchema(schema)
|
26
|
+
.build()
|
27
|
+
val metadata: Map[String, String] = Map.empty // NOTE: When is this used?
|
28
|
+
new WriteContext(messageType, metadata.asJava)
|
29
|
+
}
|
30
|
+
|
31
|
+
override def prepareForWrite(recordConsumer: RecordConsumer): Unit =
|
32
|
+
{
|
33
|
+
currentParquetFileWriter = ParquetFileWriter(recordConsumer, schema, timestampFormatters)
|
34
|
+
}
|
35
|
+
|
36
|
+
override def write(record: PageReader): Unit =
|
37
|
+
{
|
38
|
+
currentParquetFileWriter.write(record)
|
39
|
+
}
|
33
40
|
}
|
@@ -9,117 +9,151 @@ import org.apache.parquet.io.api.{Binary, RecordConsumer}
|
|
9
9
|
import org.embulk.spi.{Column, ColumnVisitor, PageReader, Schema}
|
10
10
|
import org.embulk.spi.time.TimestampFormatter
|
11
11
|
|
12
|
-
object ParquetFileWriter {
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
timestampFormatters: Seq[TimestampFormatter] = null)
|
17
|
-
extends ParquetWriter.Builder[PageReader, Builder](path) {
|
13
|
+
object ParquetFileWriter
|
14
|
+
{
|
18
15
|
|
19
|
-
|
16
|
+
case class Builder(path: Path = null,
|
17
|
+
schema: Schema = null,
|
18
|
+
timestampFormatters: Seq[TimestampFormatter] = null)
|
19
|
+
extends ParquetWriter.Builder[PageReader, Builder](path)
|
20
|
+
{
|
20
21
|
|
21
|
-
|
22
|
+
def withPath(path: Path): Builder =
|
23
|
+
{
|
24
|
+
copy(path = path)
|
25
|
+
}
|
22
26
|
|
23
|
-
|
27
|
+
def withPath(pathString: String): Builder =
|
28
|
+
{
|
29
|
+
copy(path = new Path(pathString))
|
30
|
+
}
|
24
31
|
|
25
|
-
|
32
|
+
def withSchema(schema: Schema): Builder =
|
33
|
+
{
|
34
|
+
copy(schema = schema)
|
35
|
+
}
|
26
36
|
|
27
|
-
|
37
|
+
def withTimestampFormatters(timestampFormatters: Seq[TimestampFormatter]): Builder =
|
38
|
+
{
|
39
|
+
copy(timestampFormatters = timestampFormatters)
|
40
|
+
}
|
28
41
|
|
29
|
-
|
30
|
-
|
42
|
+
override def self(): Builder =
|
43
|
+
{
|
44
|
+
this
|
45
|
+
}
|
46
|
+
|
47
|
+
override def getWriteSupport(conf: Configuration): WriteSupport[PageReader] =
|
48
|
+
{
|
49
|
+
ParquetFileWriteSupport(schema, timestampFormatters)
|
50
|
+
}
|
31
51
|
}
|
32
|
-
}
|
33
52
|
|
34
|
-
def builder(): Builder =
|
53
|
+
def builder(): Builder =
|
54
|
+
{
|
55
|
+
Builder()
|
56
|
+
}
|
35
57
|
|
36
58
|
}
|
37
59
|
|
38
60
|
|
39
61
|
private[parquet] case class ParquetFileWriter(recordConsumer: RecordConsumer,
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
schema.visitColumns(new ColumnVisitor() {
|
52
|
-
|
53
|
-
override def booleanColumn(column: Column): Unit = {
|
54
|
-
nullOr(column, {
|
55
|
-
withWriteFieldContext(column, {
|
56
|
-
recordConsumer.addBoolean(record.getBoolean(column))
|
57
|
-
})
|
58
|
-
})
|
59
|
-
}
|
60
|
-
|
61
|
-
override def longColumn(column: Column): Unit = {
|
62
|
-
nullOr(column, {
|
63
|
-
withWriteFieldContext(column, {
|
64
|
-
recordConsumer.addLong(record.getLong(column))
|
65
|
-
})
|
66
|
-
})
|
67
|
-
}
|
62
|
+
schema: Schema,
|
63
|
+
timestampFormatters: Seq[TimestampFormatter])
|
64
|
+
{
|
65
|
+
|
66
|
+
def write(record: PageReader): Unit =
|
67
|
+
{
|
68
|
+
recordConsumer.startMessage()
|
69
|
+
writeRecord(record)
|
70
|
+
recordConsumer.endMessage()
|
71
|
+
}
|
68
72
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
private def writeRecord(record: PageReader): Unit =
|
74
|
+
{
|
75
|
+
|
76
|
+
schema.visitColumns(new ColumnVisitor()
|
77
|
+
{
|
78
|
+
|
79
|
+
override def booleanColumn(column: Column): Unit =
|
80
|
+
{
|
81
|
+
nullOr(column, {
|
82
|
+
withWriteFieldContext(column, {
|
83
|
+
recordConsumer.addBoolean(record.getBoolean(column))
|
84
|
+
})
|
85
|
+
})
|
86
|
+
}
|
87
|
+
|
88
|
+
override def longColumn(column: Column): Unit =
|
89
|
+
{
|
90
|
+
nullOr(column, {
|
91
|
+
withWriteFieldContext(column, {
|
92
|
+
recordConsumer.addLong(record.getLong(column))
|
93
|
+
})
|
94
|
+
})
|
95
|
+
}
|
96
|
+
|
97
|
+
override def doubleColumn(column: Column): Unit =
|
98
|
+
{
|
99
|
+
nullOr(column, {
|
100
|
+
withWriteFieldContext(column, {
|
101
|
+
recordConsumer.addDouble(record.getDouble(column))
|
102
|
+
})
|
103
|
+
})
|
104
|
+
}
|
105
|
+
|
106
|
+
override def stringColumn(column: Column): Unit =
|
107
|
+
{
|
108
|
+
nullOr(column, {
|
109
|
+
withWriteFieldContext(column, {
|
110
|
+
val bin = Binary.fromString(record.getString(column))
|
111
|
+
recordConsumer.addBinary(bin)
|
112
|
+
})
|
113
|
+
})
|
114
|
+
}
|
115
|
+
|
116
|
+
override def timestampColumn(column: Column): Unit =
|
117
|
+
{
|
118
|
+
nullOr(column, {
|
119
|
+
withWriteFieldContext(column, {
|
120
|
+
// TODO: is a correct way to convert for parquet ?
|
121
|
+
val t = record.getTimestamp(column)
|
122
|
+
val ft = timestampFormatters(column.getIndex).format(t)
|
123
|
+
val bin = Binary.fromString(ft)
|
124
|
+
recordConsumer.addBinary(bin)
|
125
|
+
})
|
126
|
+
})
|
127
|
+
}
|
128
|
+
|
129
|
+
override def jsonColumn(column: Column): Unit =
|
130
|
+
{
|
131
|
+
nullOr(column, {
|
132
|
+
withWriteFieldContext(column, {
|
133
|
+
// TODO: is a correct way to convert for parquet ?
|
134
|
+
val msgPack = record.getJson(column)
|
135
|
+
val bin = Binary.fromString(msgPack.toJson)
|
136
|
+
recordConsumer.addBinary(bin)
|
137
|
+
})
|
138
|
+
})
|
139
|
+
}
|
140
|
+
|
141
|
+
private def nullOr(column: Column,
|
142
|
+
f: => Unit): Unit =
|
143
|
+
{
|
144
|
+
if (!record.isNull(column)) f
|
145
|
+
}
|
146
|
+
|
147
|
+
private def withWriteFieldContext(column: Column,
|
148
|
+
f: => Unit): Unit =
|
149
|
+
{
|
150
|
+
recordConsumer.startField(column.getName, column.getIndex)
|
151
|
+
f
|
152
|
+
recordConsumer.endField(column.getName, column.getIndex)
|
153
|
+
}
|
76
154
|
|
77
|
-
override def stringColumn(column: Column): Unit = {
|
78
|
-
nullOr(column, {
|
79
|
-
withWriteFieldContext(column, {
|
80
|
-
val bin = Binary.fromString(record.getString(column))
|
81
|
-
recordConsumer.addBinary(bin)
|
82
|
-
})
|
83
155
|
})
|
84
|
-
}
|
85
156
|
|
86
|
-
|
87
|
-
nullOr(column, {
|
88
|
-
withWriteFieldContext(column, {
|
89
|
-
// TODO: is a correct way to convert for parquet ?
|
90
|
-
val t = record.getTimestamp(column)
|
91
|
-
val ft = timestampFormatters(column.getIndex).format(t)
|
92
|
-
val bin = Binary.fromString(ft)
|
93
|
-
recordConsumer.addBinary(bin)
|
94
|
-
})
|
95
|
-
})
|
96
|
-
}
|
97
|
-
|
98
|
-
override def jsonColumn(column: Column): Unit = {
|
99
|
-
nullOr(column, {
|
100
|
-
withWriteFieldContext(column, {
|
101
|
-
// TODO: is a correct way to convert for parquet ?
|
102
|
-
val msgPack = record.getJson(column)
|
103
|
-
val bin = Binary.fromString(msgPack.toJson)
|
104
|
-
recordConsumer.addBinary(bin)
|
105
|
-
})
|
106
|
-
})
|
107
|
-
}
|
108
|
-
|
109
|
-
private def nullOr(column: Column,
|
110
|
-
f: => Unit): Unit = {
|
111
|
-
if (!record.isNull(column)) f
|
112
|
-
}
|
113
|
-
|
114
|
-
private def withWriteFieldContext(column: Column,
|
115
|
-
f: => Unit): Unit = {
|
116
|
-
recordConsumer.startField(column.getName, column.getIndex)
|
117
|
-
f
|
118
|
-
recordConsumer.endField(column.getName, column.getIndex)
|
119
|
-
}
|
120
|
-
|
121
|
-
})
|
122
|
-
|
123
|
-
}
|
157
|
+
}
|
124
158
|
|
125
159
|
}
|