embulk-parser-avro 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +88 -0
  5. data/build.gradle +96 -0
  6. data/config/checkstyle/checkstyle.xml +128 -0
  7. data/config/checkstyle/default.xml +108 -0
  8. data/example/.gitignore +3 -0
  9. data/example/example.yml +22 -0
  10. data/example/generate.rb +94 -0
  11. data/example/item.avsc +27 -0
  12. data/example/items.avro +0 -0
  13. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  14. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  15. data/gradlew +160 -0
  16. data/gradlew.bat +90 -0
  17. data/lib/embulk/guess/avro.rb +61 -0
  18. data/lib/embulk/parser/avro.rb +3 -0
  19. data/src/main/java/org/embulk/parser/avro/AvroColumnOption.java +16 -0
  20. data/src/main/java/org/embulk/parser/avro/AvroParserPlugin.java +93 -0
  21. data/src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java +72 -0
  22. data/src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java +83 -0
  23. data/src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java +37 -0
  24. data/src/main/java/org/embulk/parser/avro/getter/ColumnGetterFactory.java +63 -0
  25. data/src/main/java/org/embulk/parser/avro/getter/DoubleColumnGetter.java +58 -0
  26. data/src/main/java/org/embulk/parser/avro/getter/FloatColumnGetter.java +58 -0
  27. data/src/main/java/org/embulk/parser/avro/getter/GenericDataColumnGetter.java +34 -0
  28. data/src/main/java/org/embulk/parser/avro/getter/IntegerColumnGetter.java +57 -0
  29. data/src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java +58 -0
  30. data/src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java +85 -0
  31. data/src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java +145 -0
  32. data/src/test/resources/org/embulk/parser/avro/item.avsc +27 -0
  33. data/src/test/resources/org/embulk/parser/avro/items.avro +0 -0
  34. metadata +112 -0
@@ -0,0 +1,3 @@
1
+ Gemfile
2
+ Gemfile.lock
3
+ .bundle
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: "items"
4
+ parser:
5
+ type: avro
6
+ avsc : "./item.avsc"
7
+ columns:
8
+ - {name: "id", type: "long"}
9
+ - {name: "code", type: "string"}
10
+ - {name: "name", type: "string"}
11
+ - {name: "description", type: "string"}
12
+ - {name: "flag", type: "boolean"}
13
+ - {name: "price", type: "long"}
14
+ - {name: "item_type", type: "string"}
15
+ - {name: "tags", type: "json"}
16
+ - {name: "options", type: "json"}
17
+ - {name: "spec", type: "json"}
18
+ - {name: "created_at", type: "timestamp", format: "%Y-%m-%dT%H:%M:%S%:z"}
19
+ - {name: "created_at_utc", type: "timestamp"}
20
+
21
+ out:
22
+ type: stdout
@@ -0,0 +1,94 @@
1
+ require 'avro'
2
+ require 'time'
3
+
4
+ file = File.open('items.avro', 'wb')
5
+
6
+ schema = Avro::Schema.parse(File.open("item.avsc", "rb").read)
7
+
8
+ writer = Avro::IO::DatumWriter.new(schema)
9
+
10
+ dw = Avro::DataFile::Writer.new(file, writer, schema)
11
+
12
+ dw << {
13
+ "id" => 1,
14
+ "code" => 123456789012345678,
15
+ "name" => "Desktop",
16
+ "description" => "Office and Personal Usage",
17
+ "flag" => true,
18
+ "created_at" => Time.now.iso8601,
19
+ "created_at_utc" => Time.now.to_f,
20
+ "spec" => {"key" => "opt1", "value" => "optvalue1"},
21
+ "tags" => ["tag1", "tag2"],
22
+ "price" => 30000,
23
+ "options" => {"foo" => "bar", "hoge" => nil},
24
+ "item_type" => "D",
25
+ "dummy" => nil,
26
+ }
27
+ dw << {
28
+ "id" => 2,
29
+ "code" => 123456789012345679,
30
+ "name" => "Laptop",
31
+ "flag" => false,
32
+ "created_at" => Time.now.iso8601,
33
+ "created_at_utc" => Time.now.to_f,
34
+ "spec" => {"key" => "opt1", "value" => nil},
35
+ "price" => 50000,
36
+ "options" => {},
37
+ "item_type" => "M",
38
+ }
39
+ dw << {
40
+ "id" => 3,
41
+ "code" => 123456789012345680,
42
+ "name" => "Tablet",
43
+ "description" => "Personal Usage",
44
+ "flag" => true,
45
+ "created_at" => Time.now.iso8601,
46
+ "created_at_utc" => Time.now.to_f,
47
+ "tags" => ["tag3"],
48
+ "spec" => {"key" => "opt1", "value" => "optvalue1"},
49
+ "options" => {},
50
+ "item_type" => "M",
51
+ }
52
+ dw << {
53
+ "id" => 4,
54
+ "code" => 123456789012345681,
55
+ "name" => "Mobile",
56
+ "description" => "Personal Usage",
57
+ "flag" => true,
58
+ "created_at" => Time.now.iso8601,
59
+ "created_at_utc" => Time.now.to_f,
60
+ "spec" => {"key" => "opt1", "value" => "optvalue1"},
61
+ "tags" => [],
62
+ "price" => 10000,
63
+ "options" => {},
64
+ "item_type" => "M",
65
+ }
66
+ dw << {
67
+ "id" => 5,
68
+ "code" => 123456789012345682,
69
+ "name" => "Notepad",
70
+ "flag" => true,
71
+ "created_at" => Time.now.iso8601,
72
+ "created_at_utc" => Time.now.to_f,
73
+ "spec" => {"key" => "opt1", "value" => "optvalue1"},
74
+ "tags" => ["tag1", "tag2"],
75
+ "price" => 20000,
76
+ "options" => {},
77
+ "item_type" => "M",
78
+ }
79
+ dw << {
80
+ "id" => 6,
81
+ "code" => 123456789012345683,
82
+ "name" => "SmartPhone",
83
+ "description" => "Multipurpose",
84
+ "flag" => true,
85
+ "created_at" => Time.now.iso8601,
86
+ "created_at_utc" => Time.now.to_f,
87
+ "spec" => {"key" => "opt1", "value" => "optvalue1"},
88
+ "tags" => ["tag1", "tag2"],
89
+ "price" => 40000,
90
+ "options" => {},
91
+ "item_type" => "M",
92
+ }
93
+
94
+ dw.close
data/example/item.avsc ADDED
@@ -0,0 +1,27 @@
1
+ {
2
+ "type" : "record",
3
+ "name" : "Item",
4
+ "namespace" : "example.avro",
5
+ "fields" : [
6
+ {"name": "id", "type": "int"},
7
+ {"name": "code", "type": "long"},
8
+ {"name": "name", "type": "string"},
9
+ {"name": "description", "type": ["string", "null"]},
10
+ {"name": "flag", "type": "boolean"},
11
+ {"name": "created_at", "type": "string"},
12
+ {"name": "created_at_utc", "type": "float"},
13
+ {"name": "price", "type": ["double", "null"]},
14
+ {"name": "spec", "type": {
15
+ "type": "record",
16
+ "name": "item_spec",
17
+ "fields" : [
18
+ {"name" : "key", "type" : "string"},
19
+ {"name" : "value", "type" : ["string", "null"]}
20
+ ]}
21
+ },
22
+ {"name": "tags", "type": [{"type": "array", "items": "string"}, "null"]},
23
+ {"name": "options", "type": {"type": "map", "values": ["string", "null"]}},
24
+ {"name": "item_type", "type": {"name": "item_type_enum", "type": "enum", "symbols": ["D", "M"]}},
25
+ {"name": "dummy", "type": "null"}
26
+ ]
27
+ }
Binary file
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Jan 13 12:41:02 JST 2016
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
data/gradlew ADDED
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # Attempt to set APP_HOME
46
+ # Resolve links: $0 may be a link
47
+ PRG="$0"
48
+ # Need this for relative symlinks.
49
+ while [ -h "$PRG" ] ; do
50
+ ls=`ls -ld "$PRG"`
51
+ link=`expr "$ls" : '.*-> \(.*\)$'`
52
+ if expr "$link" : '/.*' > /dev/null; then
53
+ PRG="$link"
54
+ else
55
+ PRG=`dirname "$PRG"`"/$link"
56
+ fi
57
+ done
58
+ SAVED="`pwd`"
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
60
+ APP_HOME="`pwd -P`"
61
+ cd "$SAVED" >/dev/null
62
+
63
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
+
65
+ # Determine the Java command to use to start the JVM.
66
+ if [ -n "$JAVA_HOME" ] ; then
67
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
68
+ # IBM's JDK on AIX uses strange locations for the executables
69
+ JAVACMD="$JAVA_HOME/jre/sh/java"
70
+ else
71
+ JAVACMD="$JAVA_HOME/bin/java"
72
+ fi
73
+ if [ ! -x "$JAVACMD" ] ; then
74
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
75
+
76
+ Please set the JAVA_HOME variable in your environment to match the
77
+ location of your Java installation."
78
+ fi
79
+ else
80
+ JAVACMD="java"
81
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
82
+
83
+ Please set the JAVA_HOME variable in your environment to match the
84
+ location of your Java installation."
85
+ fi
86
+
87
+ # Increase the maximum file descriptors if we can.
88
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
89
+ MAX_FD_LIMIT=`ulimit -H -n`
90
+ if [ $? -eq 0 ] ; then
91
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
92
+ MAX_FD="$MAX_FD_LIMIT"
93
+ fi
94
+ ulimit -n $MAX_FD
95
+ if [ $? -ne 0 ] ; then
96
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
97
+ fi
98
+ else
99
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
100
+ fi
101
+ fi
102
+
103
+ # For Darwin, add options to specify how the application appears in the dock
104
+ if $darwin; then
105
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
106
+ fi
107
+
108
+ # For Cygwin, switch paths to Windows format before running java
109
+ if $cygwin ; then
110
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
111
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
113
+
114
+ # We build the pattern for arguments to be converted via cygpath
115
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
116
+ SEP=""
117
+ for dir in $ROOTDIRSRAW ; do
118
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
119
+ SEP="|"
120
+ done
121
+ OURCYGPATTERN="(^($ROOTDIRS))"
122
+ # Add a user-defined pattern to the cygpath arguments
123
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
124
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
125
+ fi
126
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
127
+ i=0
128
+ for arg in "$@" ; do
129
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
130
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
131
+
132
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
133
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
134
+ else
135
+ eval `echo args$i`="\"$arg\""
136
+ fi
137
+ i=$((i+1))
138
+ done
139
+ case $i in
140
+ (0) set -- ;;
141
+ (1) set -- "$args0" ;;
142
+ (2) set -- "$args0" "$args1" ;;
143
+ (3) set -- "$args0" "$args1" "$args2" ;;
144
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
145
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
146
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
147
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
148
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
149
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
150
+ esac
151
+ fi
152
+
153
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
154
+ function splitJvmOpts() {
155
+ JVM_OPTS=("$@")
156
+ }
157
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
+
160
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,61 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "avro" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ #class Avro < GuessPlugin
12
+ # Plugin.register_guess("avro", self)
13
+ #
14
+ # def guess(config, sample_buffer)
15
+ # if sample_buffer[0,2] == GZIP_HEADER
16
+ # guessed = {}
17
+ # guessed["type"] = "avro"
18
+ # guessed["property1"] = "guessed-value"
19
+ # return {"parser" => guessed}
20
+ # else
21
+ # return {}
22
+ # end
23
+ # end
24
+ #end
25
+
26
+ #class Avro < TextGuessPlugin
27
+ # Plugin.register_guess("avro", self)
28
+ #
29
+ # def guess_text(config, sample_text)
30
+ # js = JSON.parse(sample_text) rescue nil
31
+ # if js && js["mykeyword"] == "keyword"
32
+ # guessed = {}
33
+ # guessed["type"] = "avro"
34
+ # guessed["property1"] = "guessed-value"
35
+ # return {"parser" => guessed}
36
+ # else
37
+ # return {}
38
+ # end
39
+ # end
40
+ #end
41
+
42
+ #class Avro < LineGuessPlugin
43
+ # Plugin.register_guess("avro", self)
44
+ #
45
+ # def guess_lines(config, sample_lines)
46
+ # all_line_matched = sample_lines.all? do |line|
47
+ # line =~ /mypattern/
48
+ # end
49
+ # if all_line_matched
50
+ # guessed = {}
51
+ # guessed["type"] = "avro"
52
+ # guessed["property1"] = "guessed-value"
53
+ # return {"parser" => guessed}
54
+ # else
55
+ # return {}
56
+ # end
57
+ # end
58
+ #end
59
+
60
+ end
61
+ end