embulk-output-orc 0.3.0 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5bf0784f61bbc808d36ebce5e46aaab889b891a3
4
- data.tar.gz: 8937c475721a4f2c347575580982ce2a772f8d63
3
+ metadata.gz: 6ecb39bb650455937f641f073e9e0b13338f268b
4
+ data.tar.gz: 393ef796dfdf47239a11186b33988466432b0d02
5
5
  SHA512:
6
- metadata.gz: 23a1a87ca07df8ebc6d17575a3abcf58cf9c7eb5cd6569ba62cfd7fa3cb52c42cc27a00e65f99d821c396e897ebaac78c3dc5dfe9ee6a750049c2017f08d9fa5
7
- data.tar.gz: 0c08613e8c5182987a4bbb03ae3a0ce9eddb474a1b8672aa5fbc25e69da4ea0784a9982c6bc3888263eddb68213ec885eb3bb2000aaf187cb94148fa593a780d
6
+ metadata.gz: ee733e3cca10bfff236c7ff24d3249a1f8a30629ba314a9b840a2de4d6b552412fa1a6cb9555979dbad499259152b5a24439586105ce0417f629079b26775e9b
7
+ data.tar.gz: 41f8059f0af1f7eb1accccb18e33c111f7e75b7a69ffba97b26cd74de7349922ebdd852b87ea31f40c257cc57543b88cada9e32aea828f7f8d852adf20d3f328
@@ -0,0 +1,25 @@
1
+ name: Java CI
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ - uses: actions/checkout@v1
12
+ - name: Set up JDK 1.8
13
+ uses: actions/setup-java@v1
14
+ with:
15
+ java-version: 1.8
16
+ - name: Build with Gradle
17
+ run:
18
+ ./gradlew build
19
+ - name: Checkstyle & static check
20
+ run: |
21
+ ./gradlew --info checkstyle
22
+ ./gradlew --info check
23
+ # - name: Spotbugs
24
+ # run: |
25
+ # ./gradlew spotbugsMain spotbugsTest
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Orc output plugin for Embulk
2
2
 
3
- [![Build Status](https://travis-ci.org/yuokada/embulk-output-orc.svg?branch=master)](https://travis-ci.org/yuokada/embulk-output-orc)
3
+ [![Build Status](https://github.com/yuokada/embulk-output-orc/workflows/Java%20CI/badge.svg)](https://github.com/yuokada/embulk-output-orc/actions)
4
4
  [![Gem Version](https://badge.fury.io/rb/embulk-output-orc.svg)](https://badge.fury.io/rb/embulk-output-orc)
5
5
 
6
6
  ## Overview
@@ -13,15 +13,16 @@
13
13
  ## Configuration
14
14
 
15
15
  - **path_prefix**: A prefix of output path. (string, required)
16
- - support: `file`, `s3n` and `s3a`.
16
+ - support: `file`, `s3`, `s3n` and `s3a`.
17
17
  - **file_ext**: An extension of output file. (string, default: `.orc`)
18
18
  - **sequence_format**: (string, default: `.%03d`)
19
- - **buffer_size**: Set the ORC buffer size (integer, default: `262144`)
20
- - **strip_size**: Set the ORC strip size (integer, default: `67108864`)
21
- - **block_size**: Set the ORC block size (integer, default: `268435456`)
19
+ - **buffer_size**: Set the ORC buffer size (integer, default: `262144(256KB)` )
20
+ - **strip_size**: Set the ORC strip size (integer, default: `67108864(64MB)` )
21
+ - **block_size**: Set the ORC block size (integer, default: `268435456(256MB)`)
22
22
  - **compression_kind**: description (string, default: `'ZLIB'`)
23
- - `NONE`, `ZLIB`, `SNAPPY`
24
- - **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
23
+ - `NONE`, `ZLIB`, `SNAPPY`, `LZO`, `LZ4`
24
+ - **overwrite**: Overwrite if output files already exist. (boolean, default: `false`)
25
+ - Support: `LocalFileSystem`, `S3(s3, s3a, s3n)`
25
26
  - **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
26
27
 
27
28
  - **auth_method**: name of mechanism to authenticate requests (basic, env, instance, profile, properties, anonymous, or session. default: basic)
@@ -36,14 +37,34 @@
36
37
  out:
37
38
  type: orc
38
39
  path_prefix: "/tmp/output"
39
- buffer_size: 8000
40
- strip_size: 90000
41
40
  compression_kind: ZLIB
42
41
  overwrite: true
43
42
  ```
44
43
 
45
44
  ## ChangeLog
46
45
 
46
+ ### ver 0.3.4
47
+
48
+ - Bump `orc` library to `1.5.4`
49
+ - bugfix
50
+ - https://github.com/yuokada/embulk-output-orc/pull/17
51
+
52
+ ### ver 0.3.3
53
+
54
+ - bugfix
55
+ - Bump `orc` library to `1.4.4`
56
+
57
+ ### ver 0.3.2
58
+
59
+ - Update `orc` libraries to `1.4.3`
60
+
61
+ ### ver 0.3.0
62
+
63
+ - Change default value : (block_size, buffer_size, strip_size)
64
+
65
+ - default value is Hive's default value.
66
+ (see: https://orc.apache.org/docs/hive-config.html)
67
+
47
68
  ### ver 0.2.0
48
69
 
49
70
  - support: output to s3
@@ -1,8 +1,10 @@
1
1
  plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
- id "com.github.jruby-gradle.base" version "0.1.5"
3
+ id "com.github.jruby-gradle.base" version "1.5.0"
4
4
  id "java"
5
+ id "scala"
5
6
  id "checkstyle"
7
+ // id "com.github.spotbugs" version "3.0.1"
6
8
  id "org.sonarqube" version "2.5"
7
9
  }
8
10
  import com.github.jrubygradle.JRubyExec
@@ -18,26 +20,41 @@ configurations {
18
20
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
19
21
  }
20
22
 
21
- version = "0.3.0"
23
+ version = "0.3.5"
22
24
 
23
25
  sourceCompatibility = 1.8
24
26
  targetCompatibility = 1.8
25
27
 
26
28
  dependencies {
27
- compile "org.embulk:embulk-core:0.8.34"
28
- provided "org.embulk:embulk-core:0.8.34"
29
+ compile "org.embulk:embulk-core:0.9.23"
30
+ provided "org.embulk:embulk-core:0.9.23"
31
+ compile "org.scala-lang:scala-library:2.12.+"
29
32
 
30
- compile "org.apache.orc:orc:1.4.0"
31
- compile "org.apache.orc:orc-core:1.4.0"
32
- compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
33
+ compile "org.apache.orc:orc:1.5.4"
34
+ compile "org.apache.orc:orc-core:1.5.4"
35
+ compile "org.apache.hadoop:hadoop-hdfs:2.7.5"
33
36
 
34
37
  compile 'org.embulk.input.s3:embulk-util-aws-credentials:0.2.8'
35
38
  compile "com.amazonaws:aws-java-sdk-s3:1.10.33"
36
- compile "org.apache.hadoop:hadoop-aws:2.7.3"
39
+ compile "org.apache.hadoop:hadoop-aws:2.7.5"
37
40
 
38
- testCompile "junit:junit:4.+"
39
- testCompile "org.embulk:embulk-core:0.8.34:tests"
40
- testCompile "org.embulk:embulk-standards:0.8.34"
41
+ testCompile 'org.jmockit:jmockit:1.38'
42
+ // testCompile "junit:junit:4.+"
43
+ testCompile 'org.hamcrest:hamcrest-core:1.3'
44
+ testCompile 'org.testng:testng:6.14.2'
45
+ testCompile "org.embulk:embulk-core:0.8.39:tests"
46
+ testCompile "org.embulk:embulk-standards:0.8.39"
47
+ }
48
+
49
+ sourceSets {
50
+ main {
51
+ scala {
52
+ srcDirs = ['src/main/scala', 'src/main/java']
53
+ }
54
+ java {
55
+ srcDirs = []
56
+ }
57
+ }
41
58
  }
42
59
 
43
60
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -65,14 +82,16 @@ task checkstyle(type: Checkstyle) {
65
82
  }
66
83
 
67
84
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
68
- jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
69
- script "${project.name}.gemspec"
85
+ jrubyArgs "-S"
86
+ script "gem"
87
+ scriptArgs "build", "${project.name}.gemspec"
70
88
  doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
71
89
  }
72
90
 
73
91
  task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
74
- jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
75
- script "pkg/${project.name}-${project.version}.gem"
92
+ jrubyArgs "-S"
93
+ script "gem"
94
+ scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
76
95
  }
77
96
 
78
97
  task "package"(dependsOn: ["gemspec", "classpath"]) {
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  in:
3
3
  type: randomj
4
- rows: 1024
4
+ rows: 1024000
5
5
  threads: 1
6
6
  # default_timezone: Asia/Tokyo
7
7
  primary_key: myid
@@ -14,14 +14,12 @@ in:
14
14
  - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
15
15
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
16
16
 
17
- #exec:
18
- # max_threads: 6 # run at most 8 tasks concurrently
19
- # min_output_tasks: 2 # disable page scattering
17
+ exec:
18
+ max_threads: 2 # run at most 8 tasks concurrently
19
+ min_output_tasks: 1 # disable page scattering
20
20
 
21
21
  out:
22
22
  type: orc
23
23
  overwrite: true
24
24
  path_prefix: "/tmp/output"
25
- buffer_size: 8000
26
- strip_size: 90000
27
25
  compression_kind: ZLIB
@@ -1,5 +1,5 @@
1
1
  distributionBase=GRADLE_USER_HOME
2
2
  distributionPath=wrapper/dists
3
+ distributionUrl=https\://services.gradle.org/distributions/gradle-5.6.4-bin.zip
3
4
  zipStoreBase=GRADLE_USER_HOME
4
5
  zipStorePath=wrapper/dists
5
- distributionUrl=https\://services.gradle.org/distributions/gradle-4.2.1-bin.zip
data/gradlew CHANGED
@@ -1,5 +1,21 @@
1
1
  #!/usr/bin/env sh
2
2
 
3
+ #
4
+ # Copyright 2015 the original author or authors.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
3
19
  ##############################################################################
4
20
  ##
5
21
  ## Gradle start up script for UN*X
@@ -28,7 +44,7 @@ APP_NAME="Gradle"
28
44
  APP_BASE_NAME=`basename "$0"`
29
45
 
30
46
  # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
- DEFAULT_JVM_OPTS=""
47
+ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
32
48
 
33
49
  # Use the maximum available, or set MAX_FD != -1 to use that value.
34
50
  MAX_FD="maximum"
@@ -1,3 +1,19 @@
1
+ @rem
2
+ @rem Copyright 2015 the original author or authors.
3
+ @rem
4
+ @rem Licensed under the Apache License, Version 2.0 (the "License");
5
+ @rem you may not use this file except in compliance with the License.
6
+ @rem You may obtain a copy of the License at
7
+ @rem
8
+ @rem https://www.apache.org/licenses/LICENSE-2.0
9
+ @rem
10
+ @rem Unless required by applicable law or agreed to in writing, software
11
+ @rem distributed under the License is distributed on an "AS IS" BASIS,
12
+ @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ @rem See the License for the specific language governing permissions and
14
+ @rem limitations under the License.
15
+ @rem
16
+
1
17
  @if "%DEBUG%" == "" @echo off
2
18
  @rem ##########################################################################
3
19
  @rem
@@ -14,7 +30,7 @@ set APP_BASE_NAME=%~n0
14
30
  set APP_HOME=%DIRNAME%
15
31
 
16
32
  @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
- set DEFAULT_JVM_OPTS=
33
+ set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
18
34
 
19
35
  @rem Find java.exe
20
36
  if defined JAVA_HOME goto findJavaFromJavaHome
@@ -0,0 +1,42 @@
1
+ package org.embulk.output.orc
2
+
3
+ import java.nio.charset.StandardCharsets
4
+
5
+ import org.apache.hadoop.hive.ql.exec.vector._
6
+ import org.embulk.spi.{Column, ColumnVisitor, PageReader}
7
+
8
+ class OrcColumnVisitor(val reader: PageReader, val batch: VectorizedRowBatch, val i: Integer) extends ColumnVisitor {
9
+ override def booleanColumn(column: Column): Unit = if (reader.isNull(column)) {
10
+ batch.cols(column.getIndex).noNulls = false
11
+ batch.cols(column.getIndex).isNull(i) = true
12
+ }
13
+ else if (reader.getBoolean(column)) batch.cols(column.getIndex).asInstanceOf[LongColumnVector].vector(i) = 1
14
+ else batch.cols(column.getIndex).asInstanceOf[LongColumnVector].vector(i) = 0
15
+
16
+ override def longColumn(column: Column): Unit = if (reader.isNull(column)) {
17
+ batch.cols(column.getIndex).noNulls = false
18
+ batch.cols(column.getIndex).isNull(i) = true
19
+ }
20
+ else batch.cols(column.getIndex).asInstanceOf[LongColumnVector].vector(i) = reader.getLong(column)
21
+
22
+ override def doubleColumn(column: Column): Unit = if (reader.isNull(column)) {
23
+ batch.cols(column.getIndex).noNulls = false
24
+ batch.cols(column.getIndex).isNull(i) = true
25
+ }
26
+ else batch.cols(column.getIndex).asInstanceOf[DoubleColumnVector].vector(i) = reader.getDouble(column)
27
+
28
+ override def stringColumn(column: Column): Unit = if (!reader.isNull(column)) batch.cols(column.getIndex).asInstanceOf[BytesColumnVector].setVal(i, reader.getString(column).getBytes(StandardCharsets.UTF_8))
29
+ else {
30
+ batch.cols(column.getIndex).noNulls = false
31
+ batch.cols(column.getIndex).isNull(i) = true
32
+ }
33
+
34
+ override def timestampColumn(column: Column): Unit = if (reader.isNull(column)) batch.cols(column.getIndex).asInstanceOf[TimestampColumnVector].setNullValue(i)
35
+ else {
36
+ val timestamp = reader.getTimestamp(column)
37
+ val ts = new java.sql.Timestamp(timestamp.getEpochSecond * 1000)
38
+ batch.cols(column.getIndex).asInstanceOf[TimestampColumnVector].set(i, ts)
39
+ }
40
+
41
+ override def jsonColumn(column: Column) = throw new UnsupportedOperationException("orc output plugin does not support json type")
42
+ }
@@ -0,0 +1,156 @@
1
+ package org.embulk.output.orc
2
+
3
+ import java.io.IOException
4
+ import java.util
5
+
6
+ import org.apache.hadoop.conf.Configuration
7
+ import org.apache.hadoop.fs.{LocalFileSystem, Path}
8
+ import org.apache.hadoop.hdfs.DistributedFileSystem
9
+ import org.apache.hadoop.util.VersionInfo
10
+ import org.apache.orc.{CompressionKind, MemoryManager, OrcFile, TypeDescription, Writer}
11
+ import org.embulk.config.{ConfigSource, TaskReport, TaskSource}
12
+ import org.embulk.spi.util.Timestamps
13
+ import org.embulk.spi.{Exec, OutputPlugin, PageReader, Schema}
14
+ import org.embulk.util.aws.credentials.AwsCredentials
15
+
16
+ object OrcOutputPlugin {
17
+ private[orc] def getSchema(schema: Schema) = {
18
+ val oschema = TypeDescription.createStruct
19
+ for (i <- 0 until schema.size) {
20
+ val column = schema.getColumn(i)
21
+ val `type` = column.getType
22
+ `type`.getName match {
23
+ case "long" =>
24
+ oschema.addField(column.getName, TypeDescription.createLong)
25
+ case "double" =>
26
+ oschema.addField(column.getName, TypeDescription.createDouble)
27
+ case "boolean" =>
28
+ oschema.addField(column.getName, TypeDescription.createBoolean)
29
+ case "string" =>
30
+ oschema.addField(column.getName, TypeDescription.createString)
31
+ case "timestamp" =>
32
+ oschema.addField(column.getName, TypeDescription.createTimestamp)
33
+ case _ =>
34
+ System.out.println("Unsupported type")
35
+ }
36
+ }
37
+ oschema
38
+ }
39
+
40
+ // We avoid using orc.MemoryManagerImpl since it is not threadsafe, but embulk is multi-threaded.
41
+ // Embulk creates and uses multiple instances of TransactionalPageOutput in worker threads.
42
+ // As a workaround, WriterLocalMemoryManager is bound to a single orc.Writer instance, and
43
+ // notifies checkMemory() only to that instance.
44
+ private class WriterLocalMemoryManager extends MemoryManager {
45
+ final private[orc] val rowsBetweenChecks = 10000
46
+ private var rowsAddedSinceCheck = 0
47
+ private[orc] var boundCallback: MemoryManager.Callback = _
48
+
49
+ @throws[IOException]
50
+ override def addWriter(path: Path, requestedAllocation: Long, callback: MemoryManager.Callback): Unit = {
51
+ if (boundCallback != null) {
52
+ throw new IllegalStateException("WriterLocalMemoryManager should be bound to a single orc.Writer instance.")
53
+ } else {
54
+ boundCallback = callback
55
+ }
56
+ }
57
+
58
+ @throws[IOException]
59
+ override def removeWriter(path: Path): Unit = boundCallback = null
60
+
61
+ @throws[IOException]
62
+ override def addedRow(rows: Int): Unit = {
63
+ rowsAddedSinceCheck += rows
64
+ if (rowsAddedSinceCheck > rowsBetweenChecks) {
65
+ boundCallback.checkMemory(1)
66
+ rowsAddedSinceCheck = 0
67
+ }
68
+ }
69
+ }
70
+
71
+ }
72
+
73
+ class OrcOutputPlugin extends OutputPlugin {
74
+ override def transaction(config: ConfigSource, schema: Schema, taskCount: Int, control: OutputPlugin.Control) = {
75
+ val task = config.loadConfig(classOf[PluginTask])
76
+ // retryable (idempotent) output:
77
+ // return resume(task.dump(), schema, taskCount, control);
78
+ // non-retryable (non-idempotent) output:
79
+ control.run(task.dump)
80
+ Exec.newConfigDiff
81
+ }
82
+
83
+ override def resume(taskSource: TaskSource, schema: Schema, taskCount: Int, control: OutputPlugin.Control) = throw new UnsupportedOperationException("orc output plugin does not support resuming")
84
+
85
+ override def cleanup(taskSource: TaskSource, schema: Schema, taskCount: Int, successTaskReports: util.List[TaskReport]): Unit = {
86
+ }
87
+
88
+ override def open(taskSource: TaskSource, schema: Schema, taskIndex: Int) = {
89
+ val task = taskSource.loadTask(classOf[PluginTask])
90
+ if (task.getOverwrite) {
91
+ val credentials = AwsCredentials.getAWSCredentialsProvider(task).getCredentials
92
+ OrcOutputPluginHelper.removeOldFile(buildPath(task, taskIndex), task)
93
+ }
94
+ val reader = new PageReader(schema)
95
+ val writer = createWriter(task, schema, taskIndex)
96
+ new OrcTransactionalPageOutput(reader, writer, task)
97
+ }
98
+
99
+ private def buildPath(task: PluginTask, processorIndex: Int): String = {
100
+ val pathPrefix = task.getPathPrefix
101
+ val pathSuffix = task.getFileNameExtension
102
+ val sequenceFormat = task.getSequenceFormat
103
+ val fmt = java.lang.String.format(sequenceFormat, processorIndex.asInstanceOf[AnyRef])
104
+ pathPrefix + fmt + pathSuffix
105
+ }
106
+
107
+ private def getHadoopConfiguration(task: PluginTask) = {
108
+ val conf = new Configuration
109
+ // see: https://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
110
+ conf.set("fs.hdfs.impl", classOf[DistributedFileSystem].getName)
111
+ conf.set("fs.file.impl", classOf[LocalFileSystem].getName)
112
+ // see: https://stackoverflow.com/questions/20833444/how-to-set-objects-in-hadoop-configuration
113
+ AwsCredentials.getAWSCredentialsProvider(task)
114
+ if (task.getAccessKeyId.isPresent) {
115
+ conf.set("fs.s3a.access.key", task.getAccessKeyId.get)
116
+ conf.set("fs.s3n.awsAccessKeyId", task.getAccessKeyId.get)
117
+ }
118
+ if (task.getSecretAccessKey.isPresent) {
119
+ conf.set("fs.s3a.secret.key", task.getSecretAccessKey.get)
120
+ conf.set("fs.s3n.awsSecretAccessKey", task.getSecretAccessKey.get)
121
+ }
122
+ if (task.getEndpoint.isPresent) {
123
+ conf.set("fs.s3a.endpoint", task.getEndpoint.get)
124
+ conf.set("fs.s3n.endpoint", task.getEndpoint.get)
125
+ }
126
+ conf
127
+ }
128
+
129
+ private def createWriter(task: PluginTask, schema: Schema, processorIndex: Int): Writer = {
130
+ val timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions)
131
+ val conf = getHadoopConfiguration(task)
132
+ val oschema = OrcOutputPlugin.getSchema(schema)
133
+ // see: https://groups.google.com/forum/#!topic/vertx/lLb-slzpWVg
134
+ Thread.currentThread.setContextClassLoader(classOf[VersionInfo].getClassLoader)
135
+
136
+ var writer: Writer = null
137
+ try { // Make writerOptions
138
+ val writerOptions = createWriterOptions(task, conf)
139
+ // see: https://stackoverflow.com/questions/9256733/how-to-connect-hive-in-ireport
140
+ // see: https://community.hortonworks.com/content/kbentry/73458/connecting-dbvisualizer-and-datagrip-to-hive-with.html
141
+ writer = OrcFile.createWriter(new Path(buildPath(task, processorIndex)), writerOptions.setSchema(oschema).memory(new OrcOutputPlugin.WriterLocalMemoryManager).version(OrcFile.Version.V_0_12))
142
+ } catch {
143
+ case e: IOException => throw e
144
+ }
145
+ writer
146
+ }
147
+
148
+ private def createWriterOptions(task: PluginTask, conf: Configuration) = {
149
+ val bufferSize = task.getBufferSize
150
+ val stripSize = task.getStripSize
151
+ val blockSize = task.getBlockSize
152
+ val kindString = task.getCompressionKind
153
+ val kind = CompressionKind.valueOf(kindString)
154
+ OrcFile.writerOptions(conf).bufferSize(bufferSize).blockSize(blockSize.toLong).stripeSize(stripSize.toLong).compress(kind)
155
+ }
156
+ }