embulk-output-bigquery 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 27c54d556e2bb6e353099dd3cbee5ca5a446025f
4
+ data.tar.gz: b090539b1d1e91859ee38fead0df5ab78c2a1244
5
+ SHA512:
6
+ metadata.gz: acbd50a0d44feda6db33b6e617b350c77df7866b2d2dff854357d942ebdf2225a188cf5c79192eadc5e5c1be703310354ce42d76f092501e0e665caddfcad785
7
+ data.tar.gz: 303a52ae2ac6b10469a79f40582607ffdfaeba9849c7eac75b67f20714873c4986307a20000a047d03255fecd44515816fe30f0096a86b2aa965883ce294536c
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ target/
2
+ build/
3
+ pkg/
4
+ *.iml
5
+ *~
6
+ ._*
7
+ .idea
8
+ tmp/
9
+ vendor/
10
+ /classpath/
11
+ /.bundle
12
+ .yardoc
13
+ /embulk-*.jar
14
+ /.gradle
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,87 @@
1
+
2
+ # embulk-output-bigquery
3
+
4
+ [Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) via [GCS(Google Cloud Storage)](https://cloud.google.com/storage/)
5
+
6
+ ## Overview
7
+
8
+ load data into Google BigQuery as batch jobs via GCS for big amount of data
9
+ https://developers.google.com/bigquery/loading-data-into-bigquery
10
+
11
+ * **Plugin type**: output
12
+ * **Resume supported**: no
13
+ * **Cleanup supported**: no
14
+ * **Dynamic table creating**: todo
15
+
16
+ ### NOT IMPLEMENTED
17
+ * insert data over streaming inserts
18
+ * for continuous real-time insertions
19
+ * Pleast use other product, like [fluent-plugin-bigquery](https://github.com/kaizenplatform/fluent-plugin-bigquery)
20
+ * https://developers.google.com/bigquery/streaming-data-into-bigquery#usecases
21
+
22
+ Current version of this plugin supports Google API with Service Account Authentication, but does not support
23
+ OAuth flow for installed applications.
24
+
25
+ ## Configuration
26
+
27
+ - **service_account_email**: your Google service account email (string, required)
28
+ - **p12_keyfile_path**: fullpath of private key in P12(PKCS12) format (string, required)
29
+ - **path_prefix**: (string, required)
30
+ - **sequence_format**: (string, optional, default is %03d.%02d)
31
+ - **file_ext**: (string, required)
32
+ - **source_format**: file type (NEWLINE_DELIMITED_JSON or CSV) (string, required, default is CSV)
33
+ - **is_file_compressed**: upload file is gzip compressed or not. (boolean, optional, default is 1)
34
+ - **bucket**: Google Cloud Storage output bucket name (string, required)
35
+ - **remote_path**: folder name in GCS bucket (string, optional)
36
+ - **project**: project_id (string, required)
37
+ - **dataset**: dataset (string, required)
38
+ - **table**: table name (string, required)
39
+ - **application_name**: application name anything you like (string, optional)
40
+ - **delete_from_local_when_upload_end**: (boolean, optional, default is 0)
41
+ - **delete_from_bucket_when_job_end**: (boolean, optional, default is 0)
42
+ - **job_status_max_polling_time**: max job status polling time. (int, optional, default is 3600 sec)
43
+ - **job_status_polling_interval**: job status polling interval. (int, optional, default is 10 sec)
44
+ - **is_skip_job_result_check**: (boolean, optional, default is 0)
45
+
46
+ ## Support for Google BigQuery Quota policy
47
+ embulk-output-bigquery support following [Google BigQuery Quota policy](https://cloud.google.com/bigquery/loading-data-into-bigquery#quota).
48
+
49
+ * Supported
50
+ * Maximum size per load job: 1TB across all input files
51
+ * Maximum number of files per load job: 10,000
52
+ * embulk-output-bigquery divides a file into more than one job, like below.
53
+ * job1: file1(1GB) file2(1GB)...file10(1GB)
54
+ * job2: file11(1GB) file12(1GB)
55
+
56
+ * Not Supported
57
+ * Daily limit: 1,000 load jobs per table per day (including failures)
58
+ * 10,000 load jobs per project per day (including failures)
59
+
60
+ ## Example
61
+
62
+ ```yaml
63
+ out:
64
+ type: bigquery
65
+ service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
66
+ p12_keyfile_path: /path/to/p12_keyfile.p12
67
+ path_prefix: /path/to/output
68
+ file_ext: csv.gz
69
+ source_format: CSV
70
+ is_file_compressed: 1
71
+ project: your-project-000
72
+ bucket: output_bucket_name
73
+ remote_path: folder_name
74
+ dataset: your_dataset_name
75
+ table: your_table_name
76
+ formatter:
77
+ type: csv
78
+ header_line: false
79
+ encoders:
80
+ - {type: gzip}
81
+ ```
82
+
83
+ ## Build
84
+
85
+ ```
86
+ $ ./gradlew gem
87
+ ```
data/build.gradle ADDED
@@ -0,0 +1,64 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+ repositories {
8
+ mavenCentral()
9
+ jcenter()
10
+ }
11
+ configurations {
12
+ provided
13
+ }
14
+
15
+ sourceCompatibility = 1.7
16
+ targetCompatibility = 1.7
17
+
18
+ version = "0.1.0"
19
+
20
+ dependencies {
21
+ compile "org.embulk:embulk-core:0.5.1"
22
+ provided "org.embulk:embulk-core:0.5.1"
23
+
24
+ compile "com.google.http-client:google-http-client-jackson2:1.19.0"
25
+ compile ("com.google.apis:google-api-services-storage:v1-rev27-1.19.1") {exclude module: "guava-jdk5"}
26
+ compile "com.google.apis:google-api-services-bigquery:v2-rev193-1.19.1"
27
+ compile "eu.medsea.mimeutil:mime-util:2.1.3"
28
+
29
+ testCompile "junit:junit:4.+"
30
+ }
31
+
32
+ task classpath(type: Copy, dependsOn: ["jar"]) {
33
+ doFirst { file("classpath").deleteDir() }
34
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
35
+ into "classpath"
36
+ }
37
+ clean { delete 'classpath' }
38
+
39
+ task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
40
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
41
+ script "build/gemspec"
42
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
43
+ }
44
+
45
+ task gemspec << { file("build/gemspec").write($/
46
+ Gem::Specification.new do |spec|
47
+ spec.name = "${project.name}"
48
+ spec.version = "${project.version}"
49
+ spec.authors = ["Satoshi Akama"]
50
+ spec.summary = %[Google BigQuery output plugin for Embulk]
51
+ spec.description = %[Embulk plugin that insert records to Google Bigquery.]
52
+ spec.email = ["satoshiakama@gmail.com"]
53
+ spec.licenses = ["Apache-2.0"]
54
+ spec.homepage = "https://github.com/sakama/embulk-output-bigquery"
55
+
56
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
57
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
58
+ spec.require_paths = ["lib"]
59
+
60
+ spec.add_development_dependency 'bundler', ['~> 1.0']
61
+ spec.add_development_dependency 'rake', ['>= 10.0']
62
+ end
63
+ /$)
64
+ }
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Feb 04 13:46:12 PST 2015
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
data/gradlew ADDED
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
+ if $cygwin ; then
47
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
+ fi
49
+
50
+ # Attempt to set APP_HOME
51
+ # Resolve links: $0 may be a link
52
+ PRG="$0"
53
+ # Need this for relative symlinks.
54
+ while [ -h "$PRG" ] ; do
55
+ ls=`ls -ld "$PRG"`
56
+ link=`expr "$ls" : '.*-> \(.*\)$'`
57
+ if expr "$link" : '/.*' > /dev/null; then
58
+ PRG="$link"
59
+ else
60
+ PRG=`dirname "$PRG"`"/$link"
61
+ fi
62
+ done
63
+ SAVED="`pwd`"
64
+ cd "`dirname \"$PRG\"`/" >&-
65
+ APP_HOME="`pwd -P`"
66
+ cd "$SAVED" >&-
67
+
68
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
+
70
+ # Determine the Java command to use to start the JVM.
71
+ if [ -n "$JAVA_HOME" ] ; then
72
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73
+ # IBM's JDK on AIX uses strange locations for the executables
74
+ JAVACMD="$JAVA_HOME/jre/sh/java"
75
+ else
76
+ JAVACMD="$JAVA_HOME/bin/java"
77
+ fi
78
+ if [ ! -x "$JAVACMD" ] ; then
79
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80
+
81
+ Please set the JAVA_HOME variable in your environment to match the
82
+ location of your Java installation."
83
+ fi
84
+ else
85
+ JAVACMD="java"
86
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87
+
88
+ Please set the JAVA_HOME variable in your environment to match the
89
+ location of your Java installation."
90
+ fi
91
+
92
+ # Increase the maximum file descriptors if we can.
93
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94
+ MAX_FD_LIMIT=`ulimit -H -n`
95
+ if [ $? -eq 0 ] ; then
96
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97
+ MAX_FD="$MAX_FD_LIMIT"
98
+ fi
99
+ ulimit -n $MAX_FD
100
+ if [ $? -ne 0 ] ; then
101
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
102
+ fi
103
+ else
104
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105
+ fi
106
+ fi
107
+
108
+ # For Darwin, add options to specify how the application appears in the dock
109
+ if $darwin; then
110
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111
+ fi
112
+
113
+ # For Cygwin, switch paths to Windows format before running java
114
+ if $cygwin ; then
115
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega