embulk-output-orc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ #Mon Aug 14 21:51:29 JST 2017
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip
data/gradlew ADDED
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
26
+
27
+ APP_NAME="Gradle"
28
+ APP_BASE_NAME=`basename "$0"`
29
+
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
33
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
34
+ MAX_FD="maximum"
35
+
36
+ warn ( ) {
37
+ echo "$*"
38
+ }
39
+
40
+ die ( ) {
41
+ echo
42
+ echo "$*"
43
+ echo
44
+ exit 1
45
+ }
46
+
47
+ # OS specific support (must be 'true' or 'false').
48
+ cygwin=false
49
+ msys=false
50
+ darwin=false
51
+ nonstop=false
52
+ case "`uname`" in
53
+ CYGWIN* )
54
+ cygwin=true
55
+ ;;
56
+ Darwin* )
57
+ darwin=true
58
+ ;;
59
+ MINGW* )
60
+ msys=true
61
+ ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
65
+ esac
66
+
67
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68
+
69
+ # Determine the Java command to use to start the JVM.
70
+ if [ -n "$JAVA_HOME" ] ; then
71
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72
+ # IBM's JDK on AIX uses strange locations for the executables
73
+ JAVACMD="$JAVA_HOME/jre/sh/java"
74
+ else
75
+ JAVACMD="$JAVA_HOME/bin/java"
76
+ fi
77
+ if [ ! -x "$JAVACMD" ] ; then
78
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79
+
80
+ Please set the JAVA_HOME variable in your environment to match the
81
+ location of your Java installation."
82
+ fi
83
+ else
84
+ JAVACMD="java"
85
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86
+
87
+ Please set the JAVA_HOME variable in your environment to match the
88
+ location of your Java installation."
89
+ fi
90
+
91
+ # Increase the maximum file descriptors if we can.
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93
+ MAX_FD_LIMIT=`ulimit -H -n`
94
+ if [ $? -eq 0 ] ; then
95
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96
+ MAX_FD="$MAX_FD_LIMIT"
97
+ fi
98
+ ulimit -n $MAX_FD
99
+ if [ $? -ne 0 ] ; then
100
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
101
+ fi
102
+ else
103
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104
+ fi
105
+ fi
106
+
107
+ # For Darwin, add options to specify how the application appears in the dock
108
+ if $darwin; then
109
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110
+ fi
111
+
112
+ # For Cygwin, switch paths to Windows format before running java
113
+ if $cygwin ; then
114
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
165
+ if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
166
+ cd "$(dirname "$0")"
167
+ fi
168
+
169
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_output(
2
+ "orc", "org.embulk.output.orc.OrcOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,68 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
4
+ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
5
+ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
6
+ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
7
+ import org.embulk.spi.Column;
8
+ import org.embulk.spi.ColumnVisitor;
9
+ import org.embulk.spi.Page;
10
+ import org.embulk.spi.PageReader;
11
+
12
+ public class OrcColumnVisitor implements ColumnVisitor
13
+ {
14
+ private PageReader reader;
15
+ VectorizedRowBatch batch;
16
+ Integer finalI;
17
+
18
+ public OrcColumnVisitor(PageReader pageReader, VectorizedRowBatch rowBatch, Page page, Integer i)
19
+ {
20
+ int size = page.getStringReferences().size();
21
+
22
+ this.reader = pageReader;
23
+ this.batch = rowBatch;
24
+ this.finalI = i;
25
+ }
26
+
27
+ @Override
28
+ public void booleanColumn(Column column)
29
+ {
30
+ if (reader.isNull(column)) {
31
+ ((LongColumnVector) batch.cols[column.getIndex()]).vector[finalI] = 0;
32
+ }
33
+ else {
34
+ ((LongColumnVector) batch.cols[column.getIndex()]).vector[finalI] = reader.getLong(column);
35
+ }
36
+ }
37
+
38
+ @Override
39
+ public void longColumn(Column column)
40
+ {
41
+ ((LongColumnVector) batch.cols[column.getIndex()]).vector[finalI] = reader.getLong(column);
42
+ }
43
+
44
+ @Override
45
+ public void doubleColumn(Column column)
46
+ {
47
+ ((DoubleColumnVector) batch.cols[column.getIndex()]).vector[finalI] = reader.getDouble(column);
48
+ }
49
+
50
+ @Override
51
+ public void stringColumn(Column column)
52
+ {
53
+ ((BytesColumnVector) batch.cols[column.getIndex()]).setVal(finalI,
54
+ reader.getString(column).getBytes());
55
+ }
56
+
57
+ @Override
58
+ public void timestampColumn(Column column)
59
+ {
60
+
61
+ }
62
+
63
+ @Override
64
+ public void jsonColumn(Column column)
65
+ {
66
+ // throw unsupported
67
+ }
68
+ }
@@ -0,0 +1,341 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.base.Throwables;
5
+ import org.apache.hadoop.conf.Configuration;
6
+ import org.apache.hadoop.fs.LocalFileSystem;
7
+ import org.apache.hadoop.fs.Path;
8
+ import org.apache.hadoop.hdfs.DistributedFileSystem;
9
+ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
10
+ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
11
+ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
12
+ import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
13
+ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
14
+ import org.apache.hadoop.util.VersionInfo;
15
+ import org.apache.orc.CompressionKind;
16
+ import org.apache.orc.OrcFile;
17
+ import org.apache.orc.TypeDescription;
18
+ import org.apache.orc.Writer;
19
+ import org.embulk.config.Config;
20
+ import org.embulk.config.ConfigDefault;
21
+ import org.embulk.config.ConfigDiff;
22
+ import org.embulk.config.ConfigSource;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskReport;
25
+ import org.embulk.config.TaskSource;
26
+ import org.embulk.spi.Column;
27
+ import org.embulk.spi.ColumnVisitor;
28
+ import org.embulk.spi.Exec;
29
+ import org.embulk.spi.OutputPlugin;
30
+ import org.embulk.spi.Page;
31
+ import org.embulk.spi.PageReader;
32
+ import org.embulk.spi.Schema;
33
+ import org.embulk.spi.TransactionalPageOutput;
34
+ import org.embulk.spi.time.Timestamp;
35
+ import org.embulk.spi.time.TimestampFormatter;
36
+ import org.embulk.spi.type.Type;
37
+ import org.embulk.spi.util.Timestamps;
38
+ import org.joda.time.DateTimeZone;
39
+ import org.joda.time.format.DateTimeFormat;
40
+ import org.joda.time.format.DateTimeFormatter;
41
+
42
+ import java.io.IOException;
43
+ import java.util.List;
44
+ import java.util.Map;
45
+
46
+ public class OrcOutputPlugin
47
+ implements OutputPlugin
48
+ {
49
+ public interface PluginTask
50
+ extends Task, TimestampFormatter.Task
51
+ {
52
+ @Config("path_prefix")
53
+ String getPathPrefix();
54
+
55
+ @Config("file_ext")
56
+ @ConfigDefault("\".orc\"")
57
+ String getFileNameExtension();
58
+
59
+ @Config("column_options")
60
+ @ConfigDefault("{}")
61
+ Map<String, TimestampColumnOption> getColumnOptions();
62
+
63
+ @Config("sequence_format")
64
+ @ConfigDefault("\".%03d\"")
65
+ String getSequenceFormat();
66
+
67
+ @Config("overwrite")
68
+ @ConfigDefault("false")
69
+ boolean getOverwrite();
70
+
71
+ @Config("default_from_timezone")
72
+ @ConfigDefault("\"UTC\"")
73
+ DateTimeZone getDefaultFromTimeZone();
74
+ }
75
+
76
+ public interface TimestampColumnOption
77
+ extends Task, TimestampFormatter.TimestampColumnOption
78
+ {
79
+ @Config("from_timezone")
80
+ @ConfigDefault("null")
81
+ Optional<DateTimeZone> getFromTimeZone();
82
+
83
+ @Config("from_format")
84
+ @ConfigDefault("null")
85
+ Optional<List<String>> getFromFormat();
86
+ }
87
+
88
+ @Override
89
+ public ConfigDiff transaction(ConfigSource config,
90
+ Schema schema, int taskCount,
91
+ OutputPlugin.Control control)
92
+ {
93
+ PluginTask task = config.loadConfig(PluginTask.class);
94
+
95
+ // retryable (idempotent) output:
96
+ // return resume(task.dump(), schema, taskCount, control);
97
+
98
+ // non-retryable (non-idempotent) output:
99
+ control.run(task.dump());
100
+ return Exec.newConfigDiff();
101
+ }
102
+
103
+ @Override
104
+ public ConfigDiff resume(TaskSource taskSource,
105
+ Schema schema, int taskCount,
106
+ OutputPlugin.Control control)
107
+ {
108
+ throw new UnsupportedOperationException("orc output plugin does not support resuming");
109
+ }
110
+
111
+ @Override
112
+ public void cleanup(TaskSource taskSource,
113
+ Schema schema, int taskCount,
114
+ List<TaskReport> successTaskReports)
115
+
116
+ {
117
+ }
118
+
119
+ @Override
120
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
121
+ {
122
+ PluginTask task = taskSource.loadTask(PluginTask.class);
123
+
124
+ final PageReader reader = new PageReader(schema);
125
+ Writer writer = createWriter(task, schema, taskIndex);
126
+
127
+ return new OrcTransactionalPageOutput(reader, writer, task);
128
+ // Write your code here :)
129
+ // throw new UnsupportedOperationException("OrcOutputPlugin.run method is not implemented yet");
130
+ }
131
+
132
+ private String buildPath(PluginTask task, int processorIndex)
133
+ {
134
+ final String pathPrefix = task.getPathPrefix();
135
+ final String pathSuffix = task.getFileNameExtension();
136
+ final String sequenceFormat = task.getSequenceFormat();
137
+ return pathPrefix + String.format(sequenceFormat, processorIndex) + pathSuffix;
138
+ }
139
+
140
+ private TypeDescription getSchema(Schema schema)
141
+ {
142
+ TypeDescription oschema = TypeDescription.createStruct();
143
+ for (int i = 0; i < schema.size(); i++) {
144
+ Column column = schema.getColumn(i);
145
+ Type type = column.getType();
146
+ switch (type.getName()) {
147
+ case "long":
148
+ oschema.addField(column.getName(), TypeDescription.createLong());
149
+ break;
150
+ case "double":
151
+ oschema.addField(column.getName(), TypeDescription.createDouble());
152
+ break;
153
+ case "boolean":
154
+ oschema.addField(column.getName(), TypeDescription.createBoolean());
155
+ break;
156
+ case "string":
157
+ oschema.addField(column.getName(), TypeDescription.createString());
158
+ break;
159
+ case "timestamp":
160
+ oschema.addField(column.getName(), TypeDescription.createTimestamp());
161
+ break;
162
+ default:
163
+ System.out.println("Unsupported type");
164
+ break;
165
+ }
166
+ }
167
+ return oschema;
168
+ }
169
+
170
+ private Configuration getHadoopConfiguration()
171
+ {
172
+ Configuration conf = new Configuration();
173
+
174
+ // see: https://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
175
+ conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
176
+ conf.set("fs.file.impl", LocalFileSystem.class.getName());
177
+ // see: https://stackoverflow.com/questions/20833444/how-to-set-objects-in-hadoop-configuration
178
+
179
+ return conf;
180
+ }
181
+
182
+ private Writer createWriter(PluginTask task, Schema schema, int processorIndex)
183
+ {
184
+ final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
185
+
186
+ Configuration conf = getHadoopConfiguration();
187
+ TypeDescription oschema = getSchema(schema);
188
+
189
+ // see: https://groups.google.com/forum/#!topic/vertx/lLb-slzpWVg
190
+ Thread.currentThread().setContextClassLoader(VersionInfo.class.getClassLoader());
191
+
192
+ Writer writer = null;
193
+ try {
194
+ // see: https://stackoverflow.com/questions/9256733/how-to-connect-hive-in-ireport
195
+ // see: https://community.hortonworks.com/content/kbentry/73458/connecting-dbvisualizer-and-datagrip-to-hive-with.html
196
+ writer = OrcFile.createWriter(new Path(buildPath(task, processorIndex)),
197
+ OrcFile.writerOptions(conf)
198
+ .setSchema(oschema)
199
+ .compress(CompressionKind.ZLIB)
200
+ .version(OrcFile.Version.V_0_12));
201
+ }
202
+ catch (IOException e) {
203
+ e.printStackTrace();
204
+ }
205
+ return writer;
206
+ }
207
+
208
+ class OrcTransactionalPageOutput
209
+ implements TransactionalPageOutput
210
+ {
211
+ private PageReader reader;
212
+ private Writer writer;
213
+ private DateTimeFormatter formatter;
214
+
215
+ public OrcTransactionalPageOutput(PageReader reader, Writer writer, PluginTask task)
216
+ {
217
+ this.reader = reader;
218
+ this.writer = writer;
219
+
220
+ // formatter
221
+ DateTimeZone defaultTimeZone = DateTimeZone.forTimeZone(task.getDefaultFromTimeZone().toTimeZone());
222
+ formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZone(defaultTimeZone);
223
+ }
224
+
225
+ @Override
226
+ public void add(Page page)
227
+ {
228
+ List<String> strings = page.getStringReferences();
229
+ TypeDescription schema = getSchema(reader.getSchema());
230
+ VectorizedRowBatch batch = schema.createRowBatch();
231
+ batch.size = strings.size();
232
+
233
+ reader.setPage(page);
234
+ int i = 0;
235
+ while (reader.nextRecord()) {
236
+ // batch.size = page.getStringReferences().size();
237
+ final int finalI = i;
238
+
239
+ reader.getSchema().visitColumns(new ColumnVisitor()
240
+ {
241
+
242
+ @Override
243
+ public void booleanColumn(Column column)
244
+ {
245
+ if (reader.isNull(column)) {
246
+ ((LongColumnVector) batch.cols[column.getIndex()]).vector[finalI] = 0;
247
+ }
248
+ else {
249
+ // TODO; Fix all true bug
250
+ if (reader.getBoolean(column)) {
251
+ ((LongColumnVector) batch.cols[column.getIndex()]).vector[finalI] = 1;
252
+ }
253
+ else {
254
+ ((LongColumnVector) batch.cols[column.getIndex()]).vector[finalI] = 0;
255
+ }
256
+ }
257
+ }
258
+
259
+ @Override
260
+ public void longColumn(Column column)
261
+ {
262
+ ((LongColumnVector) batch.cols[column.getIndex()]).vector[finalI] = reader.getLong(column);
263
+ }
264
+
265
+ @Override
266
+ public void doubleColumn(Column column)
267
+ {
268
+ ((DoubleColumnVector) batch.cols[column.getIndex()]).vector[finalI] = reader.getDouble(column);
269
+ }
270
+
271
+ @Override
272
+ public void stringColumn(Column column)
273
+ {
274
+ ((BytesColumnVector) batch.cols[column.getIndex()]).setVal(finalI,
275
+ reader.getString(column).getBytes());
276
+ }
277
+
278
+ @Override
279
+ public void timestampColumn(Column column)
280
+ {
281
+ if (reader.isNull(column)) {
282
+ ((TimestampColumnVector) batch.cols[column.getIndex()]).setNullValue(finalI);
283
+ }
284
+ else {
285
+ Timestamp timestamp = reader.getTimestamp(column);
286
+ if (!timestamp.equals("")) {
287
+ java.sql.Timestamp ts = new java.sql.Timestamp(timestamp.getEpochSecond() * 1000);
288
+ ((TimestampColumnVector) batch.cols[column.getIndex()]).set(finalI, ts);
289
+ }
290
+ // throw new UnsupportedOperationException("orc output plugin does not support timestamp yet");
291
+ }
292
+ }
293
+
294
+ @Override
295
+ public void jsonColumn(Column column)
296
+ {
297
+ throw new UnsupportedOperationException("orc output plugin does not support json type");
298
+ }
299
+ });
300
+ i++;
301
+ }
302
+ try {
303
+ writer.addRowBatch(batch);
304
+ }
305
+ catch (IOException e) {
306
+ e.printStackTrace();
307
+ }
308
+ }
309
+
310
+ @Override
311
+ public void finish()
312
+ {
313
+ try {
314
+ writer.close();
315
+ writer = null;
316
+ }
317
+ catch (IOException e) {
318
+ Throwables.propagate(e);
319
+ }
320
+ }
321
+
322
+ @Override
323
+ public void close()
324
+ {
325
+ // TODO: something
326
+ }
327
+
328
+ @Override
329
+ public void abort()
330
+ {
331
+ // TODO: something
332
+ }
333
+
334
+ @Override
335
+ public TaskReport commit()
336
+ {
337
+ // TODO: something
338
+ return Exec.newTaskReport();
339
+ }
340
+ }
341
+ }