embulk-formatter-avro 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +51 -0
  5. data/build.gradle +97 -0
  6. data/config/checkstyle/checkstyle.xml +128 -0
  7. data/config/checkstyle/default.xml +108 -0
  8. data/example/config.yml +34 -0
  9. data/example/sample_01.csv +7 -0
  10. data/example/schema.avsc +33 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +160 -0
  14. data/gradlew.bat +90 -0
  15. data/lib/embulk/formatter/avro.rb +3 -0
  16. data/src/main/java/org/embulk/formatter/avro/AvroFormatterColumnVisitor.java +97 -0
  17. data/src/main/java/org/embulk/formatter/avro/AvroFormatterPlugin.java +183 -0
  18. data/src/main/java/org/embulk/formatter/avro/converter/AbstractAvroValueConverter.java +36 -0
  19. data/src/main/java/org/embulk/formatter/avro/converter/AvroArrayConverter.java +52 -0
  20. data/src/main/java/org/embulk/formatter/avro/converter/AvroBooleanConverter.java +14 -0
  21. data/src/main/java/org/embulk/formatter/avro/converter/AvroDoubleConverter.java +29 -0
  22. data/src/main/java/org/embulk/formatter/avro/converter/AvroEnumConverter.java +25 -0
  23. data/src/main/java/org/embulk/formatter/avro/converter/AvroFixedConverter.java +16 -0
  24. data/src/main/java/org/embulk/formatter/avro/converter/AvroFloatConverter.java +29 -0
  25. data/src/main/java/org/embulk/formatter/avro/converter/AvroIntConverter.java +29 -0
  26. data/src/main/java/org/embulk/formatter/avro/converter/AvroLongConverter.java +29 -0
  27. data/src/main/java/org/embulk/formatter/avro/converter/AvroMapConverter.java +52 -0
  28. data/src/main/java/org/embulk/formatter/avro/converter/AvroNullConverter.java +40 -0
  29. data/src/main/java/org/embulk/formatter/avro/converter/AvroRecordConverter.java +59 -0
  30. data/src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java +41 -0
  31. data/src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java +52 -0
  32. data/src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java +5 -0
  33. metadata +111 -0
@@ -0,0 +1,7 @@
1
+ id,account,time,purchase,comment,data,data2,type,md5
2
+ 1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9
3
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL
4
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll
5
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29
6
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL
7
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL
@@ -0,0 +1,33 @@
1
+ {
2
+ "type" : "record",
3
+ "name" : "Item",
4
+ "namespace" : "example.avro",
5
+ "fields" : [
6
+ {"name": "id", "type": "long"},
7
+ {"name": "account", "type": "long"},
8
+ {"name": "time", "type": "string"},
9
+ {"name": "purchase", "type": "string"},
10
+ {"name": "comment", "type": ["string", "null"]},
11
+ {"name": "data", "type": [
12
+ {
13
+ "type": "record",
14
+ "name": "data_record",
15
+ "fields": [
16
+ {"name": "foo", "type": ["null", "string"]},
17
+ {"name": "events", "type": ["null", {"type": "array", "items": {"type": "map", "values": "string"}}]}
18
+ ]
19
+ }, "null"]
20
+ },
21
+ {"name": "data2", "type": {
22
+ "type": "array", "items": {
23
+ "type": "record",
24
+ "name": "data2_record",
25
+ "fields": [
26
+ {"name": "hoge", "type": ["null", "int"]}
27
+ ]
28
+ }
29
+ }},
30
+ {"name": "type", "type": {"type": "enum", "name": "type_enum", "symbols": ["A", "B", "C"]}},
31
+ {"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]}
32
+ ]
33
+ }
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Jan 13 12:41:02 JST 2016
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
data/gradlew ADDED
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # Attempt to set APP_HOME
46
+ # Resolve links: $0 may be a link
47
+ PRG="$0"
48
+ # Need this for relative symlinks.
49
+ while [ -h "$PRG" ] ; do
50
+ ls=`ls -ld "$PRG"`
51
+ link=`expr "$ls" : '.*-> \(.*\)$'`
52
+ if expr "$link" : '/.*' > /dev/null; then
53
+ PRG="$link"
54
+ else
55
+ PRG=`dirname "$PRG"`"/$link"
56
+ fi
57
+ done
58
+ SAVED="`pwd`"
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
60
+ APP_HOME="`pwd -P`"
61
+ cd "$SAVED" >/dev/null
62
+
63
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
+
65
+ # Determine the Java command to use to start the JVM.
66
+ if [ -n "$JAVA_HOME" ] ; then
67
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
68
+ # IBM's JDK on AIX uses strange locations for the executables
69
+ JAVACMD="$JAVA_HOME/jre/sh/java"
70
+ else
71
+ JAVACMD="$JAVA_HOME/bin/java"
72
+ fi
73
+ if [ ! -x "$JAVACMD" ] ; then
74
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
75
+
76
+ Please set the JAVA_HOME variable in your environment to match the
77
+ location of your Java installation."
78
+ fi
79
+ else
80
+ JAVACMD="java"
81
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
82
+
83
+ Please set the JAVA_HOME variable in your environment to match the
84
+ location of your Java installation."
85
+ fi
86
+
87
+ # Increase the maximum file descriptors if we can.
88
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
89
+ MAX_FD_LIMIT=`ulimit -H -n`
90
+ if [ $? -eq 0 ] ; then
91
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
92
+ MAX_FD="$MAX_FD_LIMIT"
93
+ fi
94
+ ulimit -n $MAX_FD
95
+ if [ $? -ne 0 ] ; then
96
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
97
+ fi
98
+ else
99
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
100
+ fi
101
+ fi
102
+
103
+ # For Darwin, add options to specify how the application appears in the dock
104
+ if $darwin; then
105
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
106
+ fi
107
+
108
+ # For Cygwin, switch paths to Windows format before running java
109
+ if $cygwin ; then
110
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
111
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
113
+
114
+ # We build the pattern for arguments to be converted via cygpath
115
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
116
+ SEP=""
117
+ for dir in $ROOTDIRSRAW ; do
118
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
119
+ SEP="|"
120
+ done
121
+ OURCYGPATTERN="(^($ROOTDIRS))"
122
+ # Add a user-defined pattern to the cygpath arguments
123
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
124
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
125
+ fi
126
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
127
+ i=0
128
+ for arg in "$@" ; do
129
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
130
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
131
+
132
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
133
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
134
+ else
135
+ eval `echo args$i`="\"$arg\""
136
+ fi
137
+ i=$((i+1))
138
+ done
139
+ case $i in
140
+ (0) set -- ;;
141
+ (1) set -- "$args0" ;;
142
+ (2) set -- "$args0" "$args1" ;;
143
+ (3) set -- "$args0" "$args1" "$args2" ;;
144
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
145
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
146
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
147
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
148
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
149
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
150
+ esac
151
+ fi
152
+
153
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
154
+ function splitJvmOpts() {
155
+ JVM_OPTS=("$@")
156
+ }
157
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
+
160
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_formatter(
2
+ "avro", "org.embulk.formatter.avro.AvroFormatterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,97 @@
1
+ package org.embulk.formatter.avro;
2
+
3
+ import org.apache.avro.generic.GenericRecord;
4
+ import org.embulk.formatter.avro.converter.AbstractAvroValueConverter;
5
+ import org.embulk.spi.Column;
6
+ import org.embulk.spi.ColumnVisitor;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.time.Timestamp;
9
+ import org.embulk.spi.time.TimestampFormatter;
10
+ import org.msgpack.value.Value;
11
+
12
+ public class AvroFormatterColumnVisitor implements ColumnVisitor {
13
+ private PageReader pageReader;
14
+ private TimestampFormatter[] timestampFormatters;
15
+ private AbstractAvroValueConverter[] avroValueConverters;
16
+ private GenericRecord record;
17
+
18
+ AvroFormatterColumnVisitor(PageReader pageReader, TimestampFormatter[] timestampFormatters, AbstractAvroValueConverter[] avroValueConverters, GenericRecord record) {
19
+ this.pageReader = pageReader;
20
+ this.timestampFormatters = timestampFormatters;
21
+ this.avroValueConverters = avroValueConverters;
22
+ this.record = record;
23
+ }
24
+
25
+ @Override
26
+ public void booleanColumn(Column column) {
27
+ if (pageReader.isNull(column))
28
+ return;
29
+ AbstractAvroValueConverter converter = avroValueConverters[column.getIndex()];
30
+ if (converter == null)
31
+ return;
32
+ Boolean value = pageReader.getBoolean(column);
33
+ Object result = converter.booleanColumn(value);
34
+ record.put(column.getName(), result);
35
+ }
36
+
37
+ @Override
38
+ public void longColumn(Column column) {
39
+ if (pageReader.isNull(column))
40
+ return;
41
+ AbstractAvroValueConverter converter = avroValueConverters[column.getIndex()];
42
+ if (converter == null)
43
+ return;
44
+ Long value = pageReader.getLong(column);
45
+ Object result = converter.longColumn(value);
46
+ record.put(column.getName(), result);
47
+ }
48
+
49
+ @Override
50
+ public void doubleColumn(Column column) {
51
+ if (pageReader.isNull(column))
52
+ return;
53
+ AbstractAvroValueConverter converter = avroValueConverters[column.getIndex()];
54
+ if (converter == null)
55
+ return;
56
+ Double value = pageReader.getDouble(column);
57
+ Object result = converter.doubleColumn(value);
58
+ record.put(column.getName(), result);
59
+ }
60
+
61
+ @Override
62
+ public void stringColumn(Column column) {
63
+ if (pageReader.isNull(column))
64
+ return;
65
+ AbstractAvroValueConverter converter = avroValueConverters[column.getIndex()];
66
+ if (converter == null)
67
+ return;
68
+ String value = pageReader.getString(column);
69
+ Object result = converter.stringColumn(value);
70
+ record.put(column.getName(), result);
71
+ }
72
+
73
+ @Override
74
+ public void timestampColumn(Column column) {
75
+ if (pageReader.isNull(column))
76
+ return;
77
+ AbstractAvroValueConverter converter = avroValueConverters[column.getIndex()];
78
+ if (converter == null)
79
+ return;
80
+ Timestamp value = pageReader.getTimestamp(column);
81
+ String formatted = timestampFormatters[column.getIndex()].format(value);
82
+ Object result = converter.timestampColumn(formatted);
83
+ record.put(column.getName(), result);
84
+ }
85
+
86
+ @Override
87
+ public void jsonColumn(Column column) {
88
+ if (pageReader.isNull(column))
89
+ return;
90
+ AbstractAvroValueConverter converter = avroValueConverters[column.getIndex()];
91
+ if (converter == null)
92
+ return;
93
+ Value value = pageReader.getJson(column);
94
+ Object result = converter.jsonColumn(value);
95
+ record.put(column.getName(), result);
96
+ }
97
+ }
@@ -0,0 +1,183 @@
1
+ package org.embulk.formatter.avro;
2
+
3
+ import org.apache.avro.file.DataFileWriter;
4
+ import org.apache.avro.generic.GenericData;
5
+ import org.apache.avro.generic.GenericDatumWriter;
6
+ import org.apache.avro.generic.GenericRecord;
7
+ import org.embulk.config.Config;
8
+ import org.embulk.config.ConfigDefault;
9
+ import org.embulk.config.ConfigException;
10
+ import org.embulk.config.ConfigInject;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.Task;
13
+ import org.embulk.config.TaskSource;
14
+ import org.embulk.formatter.avro.converter.AbstractAvroValueConverter;
15
+ import org.embulk.formatter.avro.converter.AvroValueConverterFactory;
16
+ import org.embulk.spi.BufferAllocator;
17
+ import org.embulk.spi.Column;
18
+ import org.embulk.spi.Exec;
19
+ import org.embulk.spi.FileOutput;
20
+ import org.embulk.spi.FormatterPlugin;
21
+ import org.embulk.spi.Page;
22
+ import org.embulk.spi.PageOutput;
23
+ import org.embulk.spi.PageReader;
24
+ import org.embulk.spi.Schema;
25
+ import org.embulk.spi.time.TimestampFormatter;
26
+ import org.embulk.spi.unit.LocalFile;
27
+ import org.embulk.spi.util.FileOutputOutputStream;
28
+ import org.embulk.spi.util.Timestamps;
29
+ import org.slf4j.Logger;
30
+
31
+ import java.io.File;
32
+ import java.io.IOException;
33
+ import java.util.ArrayList;
34
+ import java.util.List;
35
+ import java.util.Map;
36
+
37
+ public class AvroFormatterPlugin
38
+ implements FormatterPlugin
39
+ {
40
+ public interface PluginTask
41
+ extends Task, TimestampFormatter.Task
42
+ {
43
+ @Config("avsc")
44
+ LocalFile getAvsc();
45
+
46
+ @Config("column_options")
47
+ @ConfigDefault("{}")
48
+ Map<String, TimestampFormatter.TimestampColumnOption> getColumnOptions();
49
+
50
+ @Config("skip_error_record")
51
+ @ConfigDefault("false")
52
+ Boolean getSkipErrorRecord();
53
+
54
+ @ConfigInject
55
+ public BufferAllocator getBufferAllocator();
56
+ }
57
+
58
+ @Override
59
+ public void transaction(ConfigSource config, Schema schema,
60
+ FormatterPlugin.Control control)
61
+ {
62
+ PluginTask task = config.loadConfig(PluginTask.class);
63
+
64
+ // validate avsc option
65
+ try {
66
+ File avsc = task.getAvsc().getFile();
67
+ new org.apache.avro.Schema.Parser().parse(avsc);
68
+ } catch (IOException e) {
69
+ throw new ConfigException("avsc file is not found");
70
+ }
71
+
72
+
73
+ // validate column_options
74
+ for (String columnName : task.getColumnOptions().keySet()) {
75
+ schema.lookupColumn(columnName); // throws SchemaConfigException
76
+ }
77
+
78
+ control.run(task.dump());
79
+ }
80
+
81
+ final Logger logger = Exec.getLogger(this.getClass());
82
+
83
+ @Override
84
+ public PageOutput open(TaskSource taskSource, final Schema schema,
85
+ FileOutput output)
86
+ {
87
+ PluginTask task = taskSource.loadTask(PluginTask.class);
88
+
89
+ final Boolean skipErrorRecord = task.getSkipErrorRecord();
90
+ final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
91
+ final FileOutputOutputStream stream = new FileOutputOutputStream(output, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.CLOSE);
92
+
93
+ final org.apache.avro.Schema avroSchema;
94
+ final DataFileWriter<GenericRecord> writer;
95
+ try {
96
+ File avsc = task.getAvsc().getFile();
97
+ avroSchema = new org.apache.avro.Schema.Parser().parse(avsc);
98
+ GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
99
+ writer = new DataFileWriter<>(datumWriter);
100
+ stream.nextFile();
101
+ writer.create(avroSchema, stream);
102
+ } catch (IOException e) {
103
+ throw new ConfigException("avsc file is not found");
104
+ }
105
+
106
+ final AbstractAvroValueConverter[] avroValueConverters = new AbstractAvroValueConverter[schema.size()];
107
+ List<AbstractAvroValueConverter> array = new ArrayList<>();
108
+ for (Column c : schema.getColumns()) {
109
+ org.apache.avro.Schema.Field field = avroSchema.getField(c.getName());
110
+ if (field != null) {
111
+ avroValueConverters[c.getIndex()] = AvroValueConverterFactory.createConverter(field);
112
+ }
113
+ }
114
+
115
+ return new PageOutput() {
116
+ private final PageReader pageReader = new PageReader(schema);
117
+
118
+ @Override
119
+ public void add(Page page) {
120
+ pageReader.setPage(page);
121
+
122
+ while (pageReader.nextRecord()) {
123
+ GenericRecord record = new GenericData.Record(avroSchema);
124
+
125
+ try {
126
+ schema.visitColumns(new AvroFormatterColumnVisitor(pageReader, timestampFormatters, avroValueConverters, record));
127
+ } catch (RuntimeException ex) {
128
+ if (skipErrorRecord) {
129
+ logger.warn(ex.getMessage());
130
+ logger.warn(String.format("skip record: %s", record));
131
+ continue;
132
+ } else {
133
+ throw ex;
134
+ }
135
+ }
136
+
137
+ try {
138
+ writer.append(record);
139
+ } catch (RuntimeException ex) {
140
+ if (skipErrorRecord) {
141
+ logger.warn(ex.getMessage());
142
+ logger.warn(String.format("skip record: %s", record));
143
+ } else {
144
+ throw ex;
145
+ }
146
+ } catch (IOException e) {
147
+ e.printStackTrace();
148
+ throw new RuntimeException("failed to write");
149
+ }
150
+ }
151
+
152
+ try {
153
+ writer.flush();
154
+ } catch (IOException e) {
155
+ e.printStackTrace();
156
+ throw new RuntimeException("failed to write");
157
+ }
158
+ }
159
+
160
+ @Override
161
+ public void finish() {
162
+ try {
163
+ writer.flush();
164
+ } catch (IOException e) {
165
+ e.printStackTrace();
166
+ throw new RuntimeException("failed to write");
167
+ }
168
+ stream.finish();
169
+ }
170
+
171
+ @Override
172
+ public void close() {
173
+ try {
174
+ writer.close();
175
+ } catch (IOException e) {
176
+ e.printStackTrace();
177
+ throw new RuntimeException("failed to write");
178
+ }
179
+ stream.close();
180
+ }
181
+ };
182
+ }
183
+ }