embulk-input-gcs 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 82f47e1ec13ce8e83f892c49519930e578440200
4
+ data.tar.gz: 121ad589176d771d23435bbf914b3724337cab6a
5
+ SHA512:
6
+ metadata.gz: 9e37e6e60fe77027bf5b2d1929b2f3af9ebcdac22f8be31e30412db1e3c96e20c3fe7a9b2a6c5081d96f46dafcc924dbbc9f5fe25bbd505b4d1537d488a1c32f
7
+ data.tar.gz: ad9f4ec153c83e317e1f90e4dcf322eecb79ab44b8d4cc17709599b7559aac9edc07b7eb9335c34205464280099ebcb3ee5a94cda7ef536b78bba52628d92fba
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ target/
2
+ build/
3
+ pkg/
4
+ *.iml
5
+ *~
6
+ ._*
7
+ .idea
8
+ tmp/
9
+ vendor/
10
+ /classpath/
11
+ /.bundle
12
+ .yardoc
13
+ /embulk-*.jar
14
+ /.gradle
data/README.md ADDED
@@ -0,0 +1,89 @@
1
+ # Google Cloud Storage file input plugin for Embulk
2
+
3
+ ## Overview
4
+
5
+ * Plugin type: **file input**
6
+ * Resume supported: **yes**
7
+ * Cleanup supported: **yes**
8
+
9
+ ## Usage
10
+
11
+ ### Install plugin
12
+
13
+ ```
14
+ embulk gem install embulk-input-gcs
15
+ ```
16
+
17
+ ### Google Service Account Settings
18
+ 1. Make project at [Google Developers Console](https://console.developers.google.com/project).
19
+
20
+ 1. Make "Service Account" with [this step](https://cloud.google.com/storage/docs/authentication#service_accounts).
21
+
22
+ Service Account has two specific scopes: read-only, read-write.
23
+
24
+ embulk-input-gcs can run "read-only" scopes.
25
+
26
+ 1. Generate private key in P12(PKCS12) format, and upload to machine.
27
+
28
+ 1. Write "EMAIL_ADDRESS" and fullpath of PKCS12 private key in yaml.
29
+
30
+ ### run
31
+
32
+ ```
33
+ embulk run /path/to/config.yml
34
+ ```
35
+
36
+ ## Configuration
37
+
38
+ - **bucket** Google Cloud Storage bucket name (string, required)
39
+ - **path_prefix** prefix of target keys (string, required)
40
+ - **service_accound_email** Google Cloud Storage service_account_email (string, required)
41
+ - **p12_keyfile_fullpath** fullpath of p12 key (string, required)
42
+ - **application_name** application name anything you like (string, optional)
43
+
44
+ ## Example
45
+
46
+ ```yaml
47
+ in:
48
+ type: gcs
49
+ bucket: my-gcs-bucket
50
+ path_prefix: logs/csv-
51
+ service_accound_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
52
+ p12_keyfile_path: /path/to/p12_keyfile.p12
53
+ application_name: Anything you like
54
+ ```
55
+
56
+ Example for "sample_01.csv.gz" , generated by [embulk example](https://github.com/embulk/embulk#trying-examples)
57
+
58
+ ```yaml
59
+ in:
60
+ type: gcs
61
+ bucket: my-gcs-bucket
62
+ path_prefix: sample_
63
+ service_accound_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
64
+ p12_keyfile_path: /path/to/p12_keyfile.p12
65
+ application_name: Anything you like
66
+ decoders:
67
+ - {type: gzip}
68
+ parser:
69
+ charset: UTF-8
70
+ newline: CRLF
71
+ type: csv
72
+ delimiter: ','
73
+ quote: '"'
74
+ header_line: true
75
+ columns:
76
+ - {name: id, type: long}
77
+ - {name: account, type: long}
78
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
79
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
80
+ - {name: comment, type: string}
81
+ out: {type: stdout}
82
+ ```
83
+
84
+ ## Build
85
+
86
+ ```
87
+ ./gradlew gem
88
+ ```
89
+
data/build.gradle ADDED
@@ -0,0 +1,59 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+ repositories {
8
+ mavenCentral()
9
+ jcenter()
10
+ }
11
+ configurations {
12
+ provided
13
+ }
14
+
15
+ version = "0.1.0"
16
+
17
+ dependencies {
18
+ compile "org.embulk:embulk-core:0.4.10"
19
+ provided "org.embulk:embulk-core:0.4.10"
20
+
21
+ compile "com.google.http-client:google-http-client-jackson2:1.19.0"
22
+ compile ("com.google.apis:google-api-services-storage:v1-rev27-1.19.1") {exclude module: "guava-jdk5"}
23
+
24
+ testCompile "junit:junit:4.+"
25
+ }
26
+
27
+ task classpath(type: Copy, dependsOn: ["jar"]) {
28
+ doFirst { file("classpath").deleteDir() }
29
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
30
+ into "classpath"
31
+ }
32
+ clean { delete 'classpath' }
33
+
34
+ task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
35
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
36
+ script "build/gemspec"
37
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
38
+ }
39
+
40
+ task gemspec << { file("build/gemspec").write($/
41
+ Gem::Specification.new do |spec|
42
+ spec.name = "${project.name}"
43
+ spec.version = "${project.version}"
44
+ spec.authors = ["Satoshi Akama"]
45
+ spec.summary = %[Google Cloud Storage input plugin for Embulk]
46
+ spec.description = %[gcs input plugin is an Embulk plugin that loads records from Google Cloud Storage. read by any input plugins. Search the output plugins by 'embulk-output' keyword.]
47
+ spec.email = ["satoshiakama@gmail.com"]
48
+ spec.licenses = ["Apache-2.0"]
49
+ spec.homepage = "https://github.com/sakama/embulk-input-gcs"
50
+
51
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
52
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
53
+ spec.require_paths = ["lib"]
54
+
55
+ spec.add_development_dependency 'bundler', ['~> 1.0']
56
+ spec.add_development_dependency 'rake', ['>= 10.0']
57
+ end
58
+ /$)
59
+ }
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Feb 04 13:46:12 PST 2015
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
data/gradlew ADDED
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
+ if $cygwin ; then
47
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
+ fi
49
+
50
+ # Attempt to set APP_HOME
51
+ # Resolve links: $0 may be a link
52
+ PRG="$0"
53
+ # Need this for relative symlinks.
54
+ while [ -h "$PRG" ] ; do
55
+ ls=`ls -ld "$PRG"`
56
+ link=`expr "$ls" : '.*-> \(.*\)$'`
57
+ if expr "$link" : '/.*' > /dev/null; then
58
+ PRG="$link"
59
+ else
60
+ PRG=`dirname "$PRG"`"/$link"
61
+ fi
62
+ done
63
+ SAVED="`pwd`"
64
+ cd "`dirname \"$PRG\"`/" >&-
65
+ APP_HOME="`pwd -P`"
66
+ cd "$SAVED" >&-
67
+
68
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
+
70
+ # Determine the Java command to use to start the JVM.
71
+ if [ -n "$JAVA_HOME" ] ; then
72
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73
+ # IBM's JDK on AIX uses strange locations for the executables
74
+ JAVACMD="$JAVA_HOME/jre/sh/java"
75
+ else
76
+ JAVACMD="$JAVA_HOME/bin/java"
77
+ fi
78
+ if [ ! -x "$JAVACMD" ] ; then
79
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80
+
81
+ Please set the JAVA_HOME variable in your environment to match the
82
+ location of your Java installation."
83
+ fi
84
+ else
85
+ JAVACMD="java"
86
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87
+
88
+ Please set the JAVA_HOME variable in your environment to match the
89
+ location of your Java installation."
90
+ fi
91
+
92
+ # Increase the maximum file descriptors if we can.
93
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94
+ MAX_FD_LIMIT=`ulimit -H -n`
95
+ if [ $? -eq 0 ] ; then
96
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97
+ MAX_FD="$MAX_FD_LIMIT"
98
+ fi
99
+ ulimit -n $MAX_FD
100
+ if [ $? -ne 0 ] ; then
101
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
102
+ fi
103
+ else
104
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105
+ fi
106
+ fi
107
+
108
+ # For Darwin, add options to specify how the application appears in the dock
109
+ if $darwin; then
110
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111
+ fi
112
+
113
+ # For Cygwin, switch paths to Windows format before running java
114
+ if $cygwin ; then
115
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_input(
2
+ :gcs, "org.embulk.input.gcs.GcsFileInputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
data/settings.gradle ADDED
@@ -0,0 +1,2 @@
1
+ rootProject.name = 'embulk-input-gcs'
2
+
@@ -0,0 +1,288 @@
1
+ package org.embulk.input.gcs;
2
+
3
+ import java.util.List;
4
+ import java.util.Arrays;
5
+ import java.util.ArrayList;
6
+ import java.util.Collections;
7
+ import java.io.File;
8
+ import java.io.IOException;
9
+ import java.io.InputStream;
10
+ import java.math.BigInteger;
11
+ import com.google.common.collect.ImmutableList;
12
+ import com.google.common.base.Optional;
13
+ import java.security.GeneralSecurityException;
14
+
15
+ import org.embulk.config.CommitReport;
16
+ import org.embulk.config.Config;
17
+ import org.embulk.config.ConfigInject;
18
+ import org.embulk.config.ConfigDiff;
19
+ import org.embulk.config.ConfigDefault;
20
+ import org.embulk.config.ConfigSource;
21
+ import org.embulk.config.Task;
22
+ import org.embulk.config.TaskSource;
23
+ import org.embulk.config.CommitReport;
24
+ import org.embulk.spi.Exec;
25
+ import org.embulk.spi.BufferAllocator;
26
+ import org.embulk.spi.FileInputPlugin;
27
+ import org.embulk.spi.TransactionalFileInput;
28
+ import org.embulk.spi.util.InputStreamFileInput;
29
+
30
+ import org.slf4j.Logger;
31
+
32
+ import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
33
+ import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
34
+ import com.google.api.client.http.HttpTransport;
35
+ import com.google.api.client.json.JsonFactory;
36
+ import com.google.api.client.json.jackson2.JacksonFactory;
37
+ import com.google.api.services.storage.Storage;
38
+ import com.google.api.services.storage.StorageScopes;
39
+ import com.google.api.services.storage.model.Bucket;
40
+ import com.google.api.services.storage.model.Objects;
41
+ import com.google.api.services.storage.model.StorageObject;
42
+
43
+ public class GcsFileInputPlugin
44
+ implements FileInputPlugin
45
+ {
46
+ public interface PluginTask
47
+ extends Task
48
+ {
49
+ @Config("bucket")
50
+ public String getBucket();
51
+
52
+ @Config("path_prefix")
53
+ public String getPathPrefix();
54
+
55
+ @Config("last_path")
56
+ @ConfigDefault("null")
57
+ public Optional<String> getLastPath();
58
+
59
+ @Config("service_accound_email")
60
+ public String getServiceAccountEmail();
61
+
62
+ @Config("application_name")
63
+ // @todo I want to set default applicaiton name like "embulk-input-gcs". But string format cause JsonParseException.
64
+ @ConfigDefault("1000000000")
65
+ public String getApplicationName();
66
+
67
+ @Config("p12_keyfile_fullpath")
68
+ public String getP12KeyfileFullpath();
69
+
70
+ public List<String> getFiles();
71
+ public void setFiles(List<String> files);
72
+
73
+ @ConfigInject
74
+ public BufferAllocator getBufferAllocator();
75
+ }
76
+
77
+ private static final Logger log = Exec.getLogger(GcsFileInputPlugin.class);
78
+ private static HttpTransport httpTransport;
79
+ private static JsonFactory jsonFactory;
80
+
81
+ @Override
82
+ public ConfigDiff transaction(ConfigSource config,
83
+ FileInputPlugin.Control control)
84
+ {
85
+ final PluginTask task = config.loadConfig(PluginTask.class);
86
+
87
+ try {
88
+ httpTransport = GoogleNetHttpTransport.newTrustedTransport();
89
+ jsonFactory = new JacksonFactory();
90
+ } catch (Exception e) {
91
+ log.warn("Could not generate http transport");
92
+ }
93
+
94
+ // list files recursively
95
+ task.setFiles(listFiles(task));
96
+ // number of processors is same with number of files
97
+ return resume(task.dump(), task.getFiles().size(), control);
98
+ }
99
+
100
+ @Override
101
+ public ConfigDiff resume(TaskSource taskSource,
102
+ int taskCount,
103
+ FileInputPlugin.Control control)
104
+ {
105
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
106
+
107
+ control.run(taskSource, taskCount);
108
+
109
+ List<String> files = new ArrayList<String>(task.getFiles());
110
+ if (files.size() == 0) {
111
+ return null;
112
+ }
113
+ Collections.sort(files);
114
+ return Exec.newConfigDiff().
115
+ set("last_path", files.get(files.size() - 1));
116
+ }
117
+
118
+ @Override
119
+ public void cleanup(TaskSource taskSource,
120
+ int taskCount,
121
+ List<CommitReport> successCommitReports)
122
+ {
123
+ }
124
+
125
+ /**
126
+ * @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
127
+ */
128
+ private static GoogleCredential getCredentialProvider (PluginTask task)
129
+ {
130
+ try {
131
+ // @see https://cloud.google.com/compute/docs/api/how-tos/authorization
132
+ // @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
133
+ GoogleCredential cred = new GoogleCredential.Builder().setTransport(httpTransport)
134
+ .setJsonFactory(jsonFactory)
135
+ .setServiceAccountId(task.getServiceAccountEmail())
136
+ .setServiceAccountScopes(
137
+ ImmutableList.of(
138
+ StorageScopes.DEVSTORAGE_READ_ONLY
139
+ )
140
+ )
141
+ .setServiceAccountPrivateKeyFromP12File(new File(task.getP12KeyfileFullpath()))
142
+ .build();
143
+ return cred;
144
+ } catch (IOException e) {
145
+ log.warn("Could not load client secrets file " + task.getP12KeyfileFullpath());
146
+ } catch (GeneralSecurityException e) {
147
+ log.warn ("Google Authentication was failed");
148
+ }
149
+ return null;
150
+ }
151
+
152
+ private static Storage newGcsClient(PluginTask task)
153
+ {
154
+ GoogleCredential credentials = getCredentialProvider(task);
155
+ Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
156
+ .setApplicationName(task.getApplicationName())
157
+ .build();
158
+
159
+ return client;
160
+ }
161
+
162
+ public List<String> listFiles(PluginTask task)
163
+ {
164
+ Storage client = newGcsClient(task);
165
+ String bucket = task.getBucket();
166
+
167
+ return listGcsFilesByPrefix(client, bucket, task.getPathPrefix(), task.getLastPath());
168
+ }
169
+
170
+ /**
171
+ * Lists GCS filenames filtered by prefix.
172
+ *
173
+ * The resulting list does not include the file that's size == 0.
174
+ */
175
+ public static List<String> listGcsFilesByPrefix(Storage client, String bucket,
176
+ String prefix, Optional<String> lastPath)
177
+ {
178
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
179
+
180
+ String lastKey = lastPath.orNull();
181
+
182
+ // @see https://cloud.google.com/storage/docs/json_api/v1/objects#resource
183
+ try {
184
+ Storage.Buckets.Get getBucket = client.buckets().get(bucket);
185
+ getBucket.setProjection("full");
186
+ Bucket bk = getBucket.execute();
187
+ if (log.isDebugEnabled()) {
188
+ log.debug("bucket name: " + bucket);
189
+ log.debug("bucket location: " + bk.getLocation());
190
+ log.debug("bucket timeCreated: " + bk.getTimeCreated());
191
+ log.debug("bucket owner: " + bk.getOwner());
192
+ }
193
+ } catch (Exception e) {
194
+ log.warn("Could not access to bucket:" + bucket);
195
+ log.warn(e.getMessage());
196
+ }
197
+
198
+
199
+ try {
200
+ // @see https://cloud.google.com/storage/docs/json_api/v1/objects/list
201
+ Storage.Objects.List listObjects = client.objects().list(bucket);
202
+ listObjects.setPrefix(prefix);
203
+ listObjects.setPageToken(lastKey);
204
+ do {
205
+ Objects objects = listObjects.execute();
206
+ List<StorageObject> items = objects.getItems();
207
+ if (items == null) {
208
+ log.info("No file was found in bucket:" + bucket + " prefix:" + prefix);
209
+ break;
210
+ }
211
+ for (StorageObject o : items) {
212
+ if (log.isDebugEnabled()) {
213
+ log.debug("filename: " + o.getName());
214
+ log.debug("updated: " + o.getUpdated());
215
+ }
216
+ if (o.getSize().compareTo(BigInteger.ZERO) > 0) {
217
+ builder.add(o.getName());
218
+ }
219
+ }
220
+ lastKey = objects.getNextPageToken();
221
+ listObjects.setPageToken(lastKey);
222
+ } while (lastKey != null);
223
+ } catch (Exception e) {
224
+ log.warn("Could not get file list from bucket:" + bucket);
225
+ log.warn(e.getMessage());
226
+ }
227
+
228
+ return builder.build();
229
+ }
230
+
231
+ @Override
232
+ public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
233
+ {
234
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
235
+ return new GcsFileInput(task, taskIndex);
236
+ }
237
+
238
+ public static class GcsFileInput
239
+ extends InputStreamFileInput
240
+ implements TransactionalFileInput
241
+ {
242
+ private static class SingleFileProvider
243
+ implements InputStreamFileInput.Provider
244
+ {
245
+ private final Storage client;
246
+ private final String bucket;
247
+ private final String key;
248
+ private boolean opened = false;
249
+
250
+ public SingleFileProvider(PluginTask task, int taskIndex)
251
+ {
252
+ this.client = newGcsClient(task);
253
+ this.bucket = task.getBucket();
254
+ this.key = task.getFiles().get(taskIndex);
255
+ }
256
+
257
+ @Override
258
+ public InputStream openNext() throws IOException
259
+ {
260
+ if (opened) {
261
+ return null;
262
+ }
263
+ opened = true;
264
+ Storage.Objects.Get getObject = client.objects().get(bucket, key);
265
+
266
+ return getObject.executeMediaAsInputStream();
267
+ }
268
+
269
+ @Override
270
+ public void close() { }
271
+ }
272
+
273
+ public GcsFileInput(PluginTask task, int taskIndex)
274
+ {
275
+ super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
276
+ }
277
+
278
+ public void abort() { }
279
+
280
+ public CommitReport commit()
281
+ {
282
+ return Exec.newCommitReport();
283
+ }
284
+
285
+ @Override
286
+ public void close() { }
287
+ }
288
+ }
@@ -0,0 +1,9 @@
1
+ package org.embulk.input.gcs;
2
+
3
+ import static org.junit.Assert.*;
4
+ import java.util.List;
5
+ import org.junit.Test;
6
+
7
+ // @todo Write unit test.
8
+ public class TestGcsFileInputPlugin {
9
+ }
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-gcs
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Satoshi Akama
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '10.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: gcs input plugin is an Embulk plugin that loads records from Google Cloud Storage. read by any input plugins. Search the output plugins by 'embulk-output' keyword.
42
+ email:
43
+ - satoshiakama@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - README.md
50
+ - build.gradle
51
+ - gradle/wrapper/gradle-wrapper.jar
52
+ - gradle/wrapper/gradle-wrapper.properties
53
+ - gradlew
54
+ - gradlew.bat
55
+ - lib/embulk/input/gcs.rb
56
+ - settings.gradle
57
+ - src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
58
+ - src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
59
+ - classpath/commons-codec-1.3.jar
60
+ - classpath/commons-logging-1.1.1.jar
61
+ - classpath/embulk-input-gcs-0.1.0.jar
62
+ - classpath/google-api-client-1.19.1.jar
63
+ - classpath/google-api-services-storage-v1-rev27-1.19.1.jar
64
+ - classpath/google-http-client-1.19.0.jar
65
+ - classpath/google-http-client-jackson2-1.19.0.jar
66
+ - classpath/google-oauth-client-1.19.0.jar
67
+ - classpath/httpclient-4.0.1.jar
68
+ - classpath/httpcore-4.0.1.jar
69
+ - classpath/jsr305-1.3.9.jar
70
+ homepage: https://github.com/sakama/embulk-input-gcs
71
+ licenses:
72
+ - Apache-2.0
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.1.9
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Google Cloud Storage input plugin for Embulk
94
+ test_files: []