embulk-output-s3_parquet 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,39 +11,47 @@ import org.embulk.output.s3_parquet.aws.AwsEndpointConfiguration.Task
11
11
 
12
12
  import scala.util.Try
13
13
 
14
- object AwsEndpointConfiguration {
15
14
 
16
- trait Task {
15
+ object AwsEndpointConfiguration
16
+ {
17
17
 
18
- @Config("endpoint")
19
- @ConfigDefault("null")
20
- def getEndpoint: Optional[String]
18
+ trait Task
19
+ {
21
20
 
22
- @Config("region")
23
- @ConfigDefault("null")
24
- def getRegion: Optional[String]
21
+ @Config("endpoint")
22
+ @ConfigDefault("null")
23
+ def getEndpoint: Optional[String]
25
24
 
26
- }
25
+ @Config("region")
26
+ @ConfigDefault("null")
27
+ def getRegion: Optional[String]
27
28
 
28
- def apply(task: Task): AwsEndpointConfiguration = new AwsEndpointConfiguration(task)
29
- }
30
-
31
- class AwsEndpointConfiguration(task: Task) {
32
-
33
- def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit = {
34
- if (task.getRegion.isPresent && task.getEndpoint.isPresent) {
35
- val ec = new EndpointConfiguration(task.getEndpoint.get, task.getRegion.get)
36
- builder.setEndpointConfiguration(ec)
37
29
  }
38
- else if (task.getRegion.isPresent && !task.getEndpoint.isPresent) {
39
- builder.setRegion(task.getRegion.get)
30
+
31
+ def apply(task: Task): AwsEndpointConfiguration =
32
+ {
33
+ new AwsEndpointConfiguration(task)
40
34
  }
41
- else if (!task.getRegion.isPresent && task.getEndpoint.isPresent) {
42
- val r: String = Try(new DefaultAwsRegionProviderChain().getRegion).getOrElse(Regions.DEFAULT_REGION.getName)
43
- val e: String = task.getEndpoint.get
44
- val ec = new EndpointConfiguration(e, r)
45
- builder.setEndpointConfiguration(ec)
35
+ }
36
+
37
+ class AwsEndpointConfiguration(task: Task)
38
+ {
39
+
40
+ def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit =
41
+ {
42
+ if (task.getRegion.isPresent && task.getEndpoint.isPresent) {
43
+ val ec = new EndpointConfiguration(task.getEndpoint.get, task.getRegion.get)
44
+ builder.setEndpointConfiguration(ec)
45
+ }
46
+ else if (task.getRegion.isPresent && !task.getEndpoint.isPresent) {
47
+ builder.setRegion(task.getRegion.get)
48
+ }
49
+ else if (!task.getRegion.isPresent && task.getEndpoint.isPresent) {
50
+ val r: String = Try(new DefaultAwsRegionProviderChain().getRegion).getOrElse(Regions.DEFAULT_REGION.getName)
51
+ val e: String = task.getEndpoint.get
52
+ val ec = new EndpointConfiguration(e, r)
53
+ builder.setEndpointConfiguration(ec)
54
+ }
46
55
  }
47
- }
48
56
 
49
57
  }
@@ -7,50 +7,58 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder
7
7
  import org.embulk.config.{Config, ConfigDefault}
8
8
  import org.embulk.output.s3_parquet.aws.AwsS3Configuration.Task
9
9
 
10
+
10
11
  /*
11
12
  * These are advanced settings, so write no documentation.
12
13
  */
13
- object AwsS3Configuration {
14
- trait Task {
14
+ object AwsS3Configuration
15
+ {
16
+ trait Task
17
+ {
15
18
 
16
- @Config("accelerate_mode_enabled")
17
- @ConfigDefault("null")
18
- def getAccelerateModeEnabled: Optional[Boolean]
19
+ @Config("accelerate_mode_enabled")
20
+ @ConfigDefault("null")
21
+ def getAccelerateModeEnabled: Optional[Boolean]
19
22
 
20
- @Config("chunked_encoding_disabled")
21
- @ConfigDefault("null")
22
- def getChunkedEncodingDisabled: Optional[Boolean]
23
+ @Config("chunked_encoding_disabled")
24
+ @ConfigDefault("null")
25
+ def getChunkedEncodingDisabled: Optional[Boolean]
23
26
 
24
- @Config("dualstack_enabled")
25
- @ConfigDefault("null")
26
- def getDualstackEnabled: Optional[Boolean]
27
+ @Config("dualstack_enabled")
28
+ @ConfigDefault("null")
29
+ def getDualstackEnabled: Optional[Boolean]
27
30
 
28
- @Config("force_global_bucket_access_enabled")
29
- @ConfigDefault("null")
30
- def getForceGlobalBucketAccessEnabled: Optional[Boolean]
31
+ @Config("force_global_bucket_access_enabled")
32
+ @ConfigDefault("null")
33
+ def getForceGlobalBucketAccessEnabled: Optional[Boolean]
31
34
 
32
- @Config("path_style_access_enabled")
33
- @ConfigDefault("null")
34
- def getPathStyleAccessEnabled: Optional[Boolean]
35
+ @Config("path_style_access_enabled")
36
+ @ConfigDefault("null")
37
+ def getPathStyleAccessEnabled: Optional[Boolean]
35
38
 
36
- @Config("payload_signing_enabled")
37
- @ConfigDefault("null")
38
- def getPayloadSigningEnabled: Optional[Boolean]
39
+ @Config("payload_signing_enabled")
40
+ @ConfigDefault("null")
41
+ def getPayloadSigningEnabled: Optional[Boolean]
39
42
 
40
- }
43
+ }
41
44
 
42
- def apply(task: Task): AwsS3Configuration = new AwsS3Configuration(task)
45
+ def apply(task: Task): AwsS3Configuration =
46
+ {
47
+ new AwsS3Configuration(task)
48
+ }
43
49
  }
44
50
 
45
- class AwsS3Configuration(task: Task) {
51
+ class AwsS3Configuration(task: Task)
52
+ {
46
53
 
47
- def configureAmazonS3ClientBuilder(builder: AmazonS3ClientBuilder): Unit = {
48
- task.getAccelerateModeEnabled.ifPresent(v => builder.setAccelerateModeEnabled(v))
49
- task.getChunkedEncodingDisabled.ifPresent(v => builder.setChunkedEncodingDisabled(v))
50
- task.getDualstackEnabled.ifPresent(v => builder.setDualstackEnabled(v))
51
- task.getForceGlobalBucketAccessEnabled.ifPresent(v => builder.setForceGlobalBucketAccessEnabled(v))
52
- task.getPathStyleAccessEnabled.ifPresent(v => builder.setPathStyleAccessEnabled(v))
53
- task.getPayloadSigningEnabled.ifPresent(v => builder.setPayloadSigningEnabled(v))
54
- }
54
+ def configureAmazonS3ClientBuilder(builder: AmazonS3ClientBuilder): Unit =
55
+ {
56
+ task.getAccelerateModeEnabled.ifPresent(v => builder.setAccelerateModeEnabled(v))
57
+ task.getChunkedEncodingDisabled.ifPresent(v => builder.setChunkedEncodingDisabled(v))
58
+ task.getDualstackEnabled.ifPresent(v => builder.setDualstackEnabled(v))
59
+ task.getForceGlobalBucketAccessEnabled.ifPresent(v => builder.setForceGlobalBucketAccessEnabled(v))
60
+ task.getPathStyleAccessEnabled.ifPresent(v => builder.setPathStyleAccessEnabled(v))
61
+ task.getPayloadSigningEnabled.ifPresent(v => builder.setPayloadSigningEnabled(v))
62
+ }
55
63
 
56
64
  }
@@ -7,50 +7,58 @@ import com.amazonaws.{ClientConfiguration, Protocol}
7
7
  import org.embulk.config.{Config, ConfigDefault, ConfigException}
8
8
  import org.embulk.output.s3_parquet.aws.HttpProxy.Task
9
9
 
10
- object HttpProxy {
11
10
 
12
- trait Task {
11
+ object HttpProxy
12
+ {
13
13
 
14
- @Config("host")
15
- @ConfigDefault("null")
16
- def getHost: Optional[String]
14
+ trait Task
15
+ {
17
16
 
18
- @Config("port")
19
- @ConfigDefault("null")
20
- def getPort: Optional[Int]
17
+ @Config("host")
18
+ @ConfigDefault("null")
19
+ def getHost: Optional[String]
21
20
 
22
- @Config("protocol")
23
- @ConfigDefault("\"https\"")
24
- def getProtocol: String
21
+ @Config("port")
22
+ @ConfigDefault("null")
23
+ def getPort: Optional[Int]
25
24
 
26
- @Config("user")
27
- @ConfigDefault("null")
28
- def getUser: Optional[String]
25
+ @Config("protocol")
26
+ @ConfigDefault("\"https\"")
27
+ def getProtocol: String
29
28
 
30
- @Config("password")
31
- @ConfigDefault("null")
32
- def getPassword: Optional[String]
29
+ @Config("user")
30
+ @ConfigDefault("null")
31
+ def getUser: Optional[String]
33
32
 
34
- }
33
+ @Config("password")
34
+ @ConfigDefault("null")
35
+ def getPassword: Optional[String]
35
36
 
36
- def apply(task: Task): HttpProxy = new HttpProxy(task)
37
+ }
38
+
39
+ def apply(task: Task): HttpProxy =
40
+ {
41
+ new HttpProxy(task)
42
+ }
37
43
 
38
44
  }
39
45
 
40
- class HttpProxy(task: Task) {
46
+ class HttpProxy(task: Task)
47
+ {
41
48
 
42
- def configureClientConfiguration(cc: ClientConfiguration): Unit = {
43
- task.getHost.ifPresent(v => cc.setProxyHost(v))
44
- task.getPort.ifPresent(v => cc.setProxyPort(v))
49
+ def configureClientConfiguration(cc: ClientConfiguration): Unit =
50
+ {
51
+ task.getHost.ifPresent(v => cc.setProxyHost(v))
52
+ task.getPort.ifPresent(v => cc.setProxyPort(v))
45
53
 
46
- Protocol.values.find(p => p.name().equals(task.getProtocol)) match {
47
- case Some(v) =>
48
- cc.setProtocol(v)
49
- case None =>
50
- throw new ConfigException(s"'${task.getProtocol}' is unsupported: `protocol` must be one of [${Protocol.values.map(v => s"'$v'").mkString(", ")}].")
51
- }
54
+ Protocol.values.find(p => p.name().equals(task.getProtocol)) match {
55
+ case Some(v) =>
56
+ cc.setProtocol(v)
57
+ case None =>
58
+ throw new ConfigException(s"'${task.getProtocol}' is unsupported: `protocol` must be one of [${Protocol.values.map(v => s"'$v'").mkString(", ")}].")
59
+ }
52
60
 
53
- task.getUser.ifPresent(v => cc.setProxyUsername(v))
54
- task.getPassword.ifPresent(v => cc.setProxyPassword(v))
55
- }
61
+ task.getUser.ifPresent(v => cc.setProxyUsername(v))
62
+ task.getPassword.ifPresent(v => cc.setProxyPassword(v))
63
+ }
56
64
  }
@@ -6,54 +6,74 @@ import org.apache.parquet.schema.{MessageType, OriginalType, PrimitiveType, Type
6
6
  import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
7
7
  import org.embulk.spi.{Column, ColumnVisitor, Schema}
8
8
 
9
- object EmbulkMessageType {
10
9
 
11
- def builder(): Builder = Builder()
12
-
13
- case class Builder(name: String = "embulk",
14
- schema: Schema = Schema.builder().build()) {
15
-
16
- def withName(name: String): Builder = Builder(name = name, schema = schema)
17
-
18
- def withSchema(schema: Schema): Builder = Builder(name = name, schema = schema)
19
-
20
- def build(): MessageType = {
21
- val builder: ImmutableList.Builder[Type] = ImmutableList.builder[Type]()
22
- schema.visitColumns(EmbulkMessageTypeColumnVisitor(builder))
23
- new MessageType("embulk", builder.build())
10
+ object EmbulkMessageType
11
+ {
24
12
 
13
+ def builder(): Builder =
14
+ {
15
+ Builder()
25
16
  }
26
17
 
27
- }
18
+ case class Builder(name: String = "embulk",
19
+ schema: Schema = Schema.builder().build())
20
+ {
28
21
 
29
- private case class EmbulkMessageTypeColumnVisitor(builder: ImmutableList.Builder[Type])
30
- extends ColumnVisitor {
22
+ def withName(name: String): Builder =
23
+ {
24
+ Builder(name = name, schema = schema)
25
+ }
31
26
 
32
- override def booleanColumn(column: Column): Unit = {
33
- builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName))
34
- }
27
+ def withSchema(schema: Schema): Builder =
28
+ {
29
+ Builder(name = name, schema = schema)
30
+ }
35
31
 
36
- override def longColumn(column: Column): Unit = {
37
- builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName))
38
- }
39
-
40
- override def doubleColumn(column: Column): Unit = {
41
- builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName))
42
- }
32
+ def build(): MessageType =
33
+ {
34
+ val builder: ImmutableList.Builder[Type] = ImmutableList.builder[Type]()
35
+ schema.visitColumns(EmbulkMessageTypeColumnVisitor(builder))
36
+ new MessageType("embulk", builder.build())
43
37
 
44
- override def stringColumn(column: Column): Unit = {
45
- builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
46
- }
38
+ }
47
39
 
48
- override def timestampColumn(column: Column): Unit = {
49
- // TODO: Support OriginalType.TIME* ?
50
- builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
51
40
  }
52
41
 
53
- override def jsonColumn(column: Column): Unit = {
54
- // TODO: does this work?
55
- builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
42
+ private case class EmbulkMessageTypeColumnVisitor(builder: ImmutableList.Builder[Type])
43
+ extends ColumnVisitor
44
+ {
45
+
46
+ override def booleanColumn(column: Column): Unit =
47
+ {
48
+ builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BOOLEAN, column.getName))
49
+ }
50
+
51
+ override def longColumn(column: Column): Unit =
52
+ {
53
+ builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.INT64, column.getName))
54
+ }
55
+
56
+ override def doubleColumn(column: Column): Unit =
57
+ {
58
+ builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.DOUBLE, column.getName))
59
+ }
60
+
61
+ override def stringColumn(column: Column): Unit =
62
+ {
63
+ builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
64
+ }
65
+
66
+ override def timestampColumn(column: Column): Unit =
67
+ {
68
+ // TODO: Support OriginalType.TIME* ?
69
+ builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
70
+ }
71
+
72
+ override def jsonColumn(column: Column): Unit =
73
+ {
74
+ // TODO: does this work?
75
+ builder.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, column.getName, OriginalType.UTF8))
76
+ }
56
77
  }
57
- }
58
78
 
59
79
  }
@@ -9,25 +9,32 @@ import org.apache.parquet.schema.MessageType
9
9
  import org.embulk.spi.{PageReader, Schema}
10
10
  import org.embulk.spi.time.TimestampFormatter
11
11
 
12
- import scala.collection.JavaConverters._
12
+ import scala.jdk.CollectionConverters._
13
13
 
14
- private[parquet] case class ParquetFileWriteSupport(schema: Schema,
15
- timestampFormatters: Seq[TimestampFormatter])
16
- extends WriteSupport[PageReader] {
17
-
18
- private var currentParquetFileWriter: ParquetFileWriter = _
19
-
20
- override def init(configuration: Configuration): WriteContext = {
21
- val messageType: MessageType = EmbulkMessageType.builder()
22
- .withSchema(schema)
23
- .build()
24
- val metadata: Map[String, String] = Map.empty // NOTE: When is this used?
25
- new WriteContext(messageType, metadata.asJava)
26
- }
27
14
 
28
- override def prepareForWrite(recordConsumer: RecordConsumer): Unit = {
29
- currentParquetFileWriter = ParquetFileWriter(recordConsumer, schema, timestampFormatters)
30
- }
31
-
32
- override def write(record: PageReader): Unit = currentParquetFileWriter.write(record)
15
+ private[parquet] case class ParquetFileWriteSupport(schema: Schema,
16
+ timestampFormatters: Seq[TimestampFormatter])
17
+ extends WriteSupport[PageReader]
18
+ {
19
+
20
+ private var currentParquetFileWriter: ParquetFileWriter = _
21
+
22
+ override def init(configuration: Configuration): WriteContext =
23
+ {
24
+ val messageType: MessageType = EmbulkMessageType.builder()
25
+ .withSchema(schema)
26
+ .build()
27
+ val metadata: Map[String, String] = Map.empty // NOTE: When is this used?
28
+ new WriteContext(messageType, metadata.asJava)
29
+ }
30
+
31
+ override def prepareForWrite(recordConsumer: RecordConsumer): Unit =
32
+ {
33
+ currentParquetFileWriter = ParquetFileWriter(recordConsumer, schema, timestampFormatters)
34
+ }
35
+
36
+ override def write(record: PageReader): Unit =
37
+ {
38
+ currentParquetFileWriter.write(record)
39
+ }
33
40
  }
@@ -9,117 +9,151 @@ import org.apache.parquet.io.api.{Binary, RecordConsumer}
9
9
  import org.embulk.spi.{Column, ColumnVisitor, PageReader, Schema}
10
10
  import org.embulk.spi.time.TimestampFormatter
11
11
 
12
- object ParquetFileWriter {
13
12
 
14
- case class Builder(path: Path = null,
15
- schema: Schema = null,
16
- timestampFormatters: Seq[TimestampFormatter] = null)
17
- extends ParquetWriter.Builder[PageReader, Builder](path) {
13
+ object ParquetFileWriter
14
+ {
18
15
 
19
- def withPath(path: Path): Builder = copy(path = path)
16
+ case class Builder(path: Path = null,
17
+ schema: Schema = null,
18
+ timestampFormatters: Seq[TimestampFormatter] = null)
19
+ extends ParquetWriter.Builder[PageReader, Builder](path)
20
+ {
20
21
 
21
- def withPath(pathString: String): Builder = copy(path = new Path(pathString))
22
+ def withPath(path: Path): Builder =
23
+ {
24
+ copy(path = path)
25
+ }
22
26
 
23
- def withSchema(schema: Schema): Builder = copy(schema = schema)
27
+ def withPath(pathString: String): Builder =
28
+ {
29
+ copy(path = new Path(pathString))
30
+ }
24
31
 
25
- def withTimestampFormatters(timestampFormatters: Seq[TimestampFormatter]): Builder = copy(timestampFormatters = timestampFormatters)
32
+ def withSchema(schema: Schema): Builder =
33
+ {
34
+ copy(schema = schema)
35
+ }
26
36
 
27
- override def self(): Builder = this
37
+ def withTimestampFormatters(timestampFormatters: Seq[TimestampFormatter]): Builder =
38
+ {
39
+ copy(timestampFormatters = timestampFormatters)
40
+ }
28
41
 
29
- override def getWriteSupport(conf: Configuration): WriteSupport[PageReader] = {
30
- ParquetFileWriteSupport(schema, timestampFormatters)
42
+ override def self(): Builder =
43
+ {
44
+ this
45
+ }
46
+
47
+ override def getWriteSupport(conf: Configuration): WriteSupport[PageReader] =
48
+ {
49
+ ParquetFileWriteSupport(schema, timestampFormatters)
50
+ }
31
51
  }
32
- }
33
52
 
34
- def builder(): Builder = Builder()
53
+ def builder(): Builder =
54
+ {
55
+ Builder()
56
+ }
35
57
 
36
58
  }
37
59
 
38
60
 
39
61
  private[parquet] case class ParquetFileWriter(recordConsumer: RecordConsumer,
40
- schema: Schema,
41
- timestampFormatters: Seq[TimestampFormatter]) {
42
-
43
- def write(record: PageReader): Unit = {
44
- recordConsumer.startMessage()
45
- writeRecord(record)
46
- recordConsumer.endMessage()
47
- }
48
-
49
- private def writeRecord(record: PageReader): Unit = {
50
-
51
- schema.visitColumns(new ColumnVisitor() {
52
-
53
- override def booleanColumn(column: Column): Unit = {
54
- nullOr(column, {
55
- withWriteFieldContext(column, {
56
- recordConsumer.addBoolean(record.getBoolean(column))
57
- })
58
- })
59
- }
60
-
61
- override def longColumn(column: Column): Unit = {
62
- nullOr(column, {
63
- withWriteFieldContext(column, {
64
- recordConsumer.addLong(record.getLong(column))
65
- })
66
- })
67
- }
62
+ schema: Schema,
63
+ timestampFormatters: Seq[TimestampFormatter])
64
+ {
65
+
66
+ def write(record: PageReader): Unit =
67
+ {
68
+ recordConsumer.startMessage()
69
+ writeRecord(record)
70
+ recordConsumer.endMessage()
71
+ }
68
72
 
69
- override def doubleColumn(column: Column): Unit = {
70
- nullOr(column, {
71
- withWriteFieldContext(column, {
72
- recordConsumer.addDouble(record.getDouble(column))
73
- })
74
- })
75
- }
73
+ private def writeRecord(record: PageReader): Unit =
74
+ {
75
+
76
+ schema.visitColumns(new ColumnVisitor()
77
+ {
78
+
79
+ override def booleanColumn(column: Column): Unit =
80
+ {
81
+ nullOr(column, {
82
+ withWriteFieldContext(column, {
83
+ recordConsumer.addBoolean(record.getBoolean(column))
84
+ })
85
+ })
86
+ }
87
+
88
+ override def longColumn(column: Column): Unit =
89
+ {
90
+ nullOr(column, {
91
+ withWriteFieldContext(column, {
92
+ recordConsumer.addLong(record.getLong(column))
93
+ })
94
+ })
95
+ }
96
+
97
+ override def doubleColumn(column: Column): Unit =
98
+ {
99
+ nullOr(column, {
100
+ withWriteFieldContext(column, {
101
+ recordConsumer.addDouble(record.getDouble(column))
102
+ })
103
+ })
104
+ }
105
+
106
+ override def stringColumn(column: Column): Unit =
107
+ {
108
+ nullOr(column, {
109
+ withWriteFieldContext(column, {
110
+ val bin = Binary.fromString(record.getString(column))
111
+ recordConsumer.addBinary(bin)
112
+ })
113
+ })
114
+ }
115
+
116
+ override def timestampColumn(column: Column): Unit =
117
+ {
118
+ nullOr(column, {
119
+ withWriteFieldContext(column, {
120
+ // TODO: is a correct way to convert for parquet ?
121
+ val t = record.getTimestamp(column)
122
+ val ft = timestampFormatters(column.getIndex).format(t)
123
+ val bin = Binary.fromString(ft)
124
+ recordConsumer.addBinary(bin)
125
+ })
126
+ })
127
+ }
128
+
129
+ override def jsonColumn(column: Column): Unit =
130
+ {
131
+ nullOr(column, {
132
+ withWriteFieldContext(column, {
133
+ // TODO: is a correct way to convert for parquet ?
134
+ val msgPack = record.getJson(column)
135
+ val bin = Binary.fromString(msgPack.toJson)
136
+ recordConsumer.addBinary(bin)
137
+ })
138
+ })
139
+ }
140
+
141
+ private def nullOr(column: Column,
142
+ f: => Unit): Unit =
143
+ {
144
+ if (!record.isNull(column)) f
145
+ }
146
+
147
+ private def withWriteFieldContext(column: Column,
148
+ f: => Unit): Unit =
149
+ {
150
+ recordConsumer.startField(column.getName, column.getIndex)
151
+ f
152
+ recordConsumer.endField(column.getName, column.getIndex)
153
+ }
76
154
 
77
- override def stringColumn(column: Column): Unit = {
78
- nullOr(column, {
79
- withWriteFieldContext(column, {
80
- val bin = Binary.fromString(record.getString(column))
81
- recordConsumer.addBinary(bin)
82
- })
83
155
  })
84
- }
85
156
 
86
- override def timestampColumn(column: Column): Unit = {
87
- nullOr(column, {
88
- withWriteFieldContext(column, {
89
- // TODO: is a correct way to convert for parquet ?
90
- val t = record.getTimestamp(column)
91
- val ft = timestampFormatters(column.getIndex).format(t)
92
- val bin = Binary.fromString(ft)
93
- recordConsumer.addBinary(bin)
94
- })
95
- })
96
- }
97
-
98
- override def jsonColumn(column: Column): Unit = {
99
- nullOr(column, {
100
- withWriteFieldContext(column, {
101
- // TODO: is a correct way to convert for parquet ?
102
- val msgPack = record.getJson(column)
103
- val bin = Binary.fromString(msgPack.toJson)
104
- recordConsumer.addBinary(bin)
105
- })
106
- })
107
- }
108
-
109
- private def nullOr(column: Column,
110
- f: => Unit): Unit = {
111
- if (!record.isNull(column)) f
112
- }
113
-
114
- private def withWriteFieldContext(column: Column,
115
- f: => Unit): Unit = {
116
- recordConsumer.startField(column.getName, column.getIndex)
117
- f
118
- recordConsumer.endField(column.getName, column.getIndex)
119
- }
120
-
121
- })
122
-
123
- }
157
+ }
124
158
 
125
159
  }