embulk-input-gcs 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +33 -4
- data/build.gradle +2 -2
- data/classpath/embulk-input-gcs-0.1.6.jar +0 -0
- data/src/main/java/org/embulk/input/gcs/GcsAuthentication.java +97 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +33 -51
- metadata +5 -4
- data/classpath/embulk-input-gcs-0.1.5.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1337868e0a547834610ae572d49228a3394aff7
|
4
|
+
data.tar.gz: 52e83dd8d9b91eb7ca715ef36b3d74f8241e2860
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b8980edca632680de44f5311b3e2a3c622d94e3ee15f76eade05a536eb96cf64397e8916b4b47516a0724b9a2d2b03666e78ff2bc9bdee60352a6714e3d3383e
|
7
|
+
data.tar.gz: 8ad0624eb9576e37dbeac7dc2eead16bc72bfae8f51274d81042f0657821024ea2595ccd71f637fa9c63af049dac6bdd61fc9fe5df24ec8d1b36ee3bfff85a56
|
data/README.md
CHANGED
@@ -15,13 +15,16 @@ embulk gem install embulk-input-gcs
|
|
15
15
|
```
|
16
16
|
|
17
17
|
### Google Service Account Settings
|
18
|
+
|
19
|
+
If you chose "private_key" as [auth_method](#Authentication), you can get service_account_email and private_key like below.
|
20
|
+
|
18
21
|
1. Make project at [Google Developers Console](https://console.developers.google.com/project).
|
19
22
|
|
20
23
|
1. Make "Service Account" with [this step](https://cloud.google.com/storage/docs/authentication#service_accounts).
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
|
25
|
+
Service Account has two specific scopes: read-only, read-write.
|
26
|
+
|
27
|
+
embulk-input-gcs can run "read-only" scopes.
|
25
28
|
|
26
29
|
1. Generate private key in P12(PKCS12) format, and upload to machine.
|
27
30
|
|
@@ -37,6 +40,7 @@ embulk run /path/to/config.yml
|
|
37
40
|
|
38
41
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
39
42
|
- **path_prefix** prefix of target keys (string, required)
|
43
|
+
- **auth_method** (string, optional, "private_key" or "compute_engine". default value is "private_key")
|
40
44
|
- **service_account_email** Google Cloud Storage service_account_email (string, required)
|
41
45
|
- **p12_keyfile_fullpath** fullpath of p12 key (string, required)
|
42
46
|
- **application_name** application name anything you like (string, optional)
|
@@ -48,6 +52,7 @@ in:
|
|
48
52
|
type: gcs
|
49
53
|
bucket: my-gcs-bucket
|
50
54
|
path_prefix: logs/csv-
|
55
|
+
auth_method: private_key #default
|
51
56
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
52
57
|
p12_keyfile_path: /path/to/p12_keyfile.p12
|
53
58
|
application_name: Anything you like
|
@@ -60,6 +65,7 @@ in:
|
|
60
65
|
type: gcs
|
61
66
|
bucket: my-gcs-bucket
|
62
67
|
path_prefix: sample_
|
68
|
+
auth_method: private_key #default
|
63
69
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
64
70
|
p12_keyfile_path: /path/to/p12_keyfile.p12
|
65
71
|
application_name: Anything you like
|
@@ -81,6 +87,29 @@ in:
|
|
81
87
|
out: {type: stdout}
|
82
88
|
```
|
83
89
|
|
90
|
+
## Authentication
|
91
|
+
|
92
|
+
There are two methods supported to fetch access token for the service account.
|
93
|
+
|
94
|
+
1. Public-Private key pair
|
95
|
+
2. Pre-defined access token (Compute Engine only)
|
96
|
+
|
97
|
+
The examples above use the first one. You first need to create a service account (client ID),
|
98
|
+
download its private key and deploy the key with embulk.
|
99
|
+
|
100
|
+
On the other hand, you don't need to explicitly create a service account for embulk when you
|
101
|
+
run embulk in Google Compute Engine. In this second authentication method, you need to
|
102
|
+
add the API scope "https://www.googleapis.com/auth/devstorage.read_only" to the scope list of your
|
103
|
+
Compute Engine instance, then you can configure embulk like this.
|
104
|
+
|
105
|
+
[Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication)
|
106
|
+
|
107
|
+
```yaml
|
108
|
+
input:
|
109
|
+
type: gcs
|
110
|
+
auth_method: compute_engine
|
111
|
+
```
|
112
|
+
|
84
113
|
## Build
|
85
114
|
|
86
115
|
```
|
data/build.gradle
CHANGED
@@ -15,7 +15,7 @@ configurations {
|
|
15
15
|
sourceCompatibility = 1.7
|
16
16
|
targetCompatibility = 1.7
|
17
17
|
|
18
|
-
version = "0.1.
|
18
|
+
version = "0.1.6"
|
19
19
|
|
20
20
|
dependencies {
|
21
21
|
compile "org.embulk:embulk-core:0.7.1"
|
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
|
|
46
46
|
spec.version = "${project.version}"
|
47
47
|
spec.authors = ["Satoshi Akama"]
|
48
48
|
spec.summary = %[Google Cloud Storage input plugin for Embulk]
|
49
|
-
spec.description = %[Reads files stored on Google Cloud Storage (Standard
|
49
|
+
spec.description = %[Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)]
|
50
50
|
spec.email = ["satoshiakama@gmail.com"]
|
51
51
|
spec.licenses = ["Apache-2.0"]
|
52
52
|
spec.homepage = "https://github.com/embulk/embulk-input-gcs"
|
Binary file
|
@@ -0,0 +1,97 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.IOException;
|
5
|
+
|
6
|
+
import com.google.common.base.Optional;
|
7
|
+
import com.google.common.collect.ImmutableList;
|
8
|
+
import java.security.GeneralSecurityException;
|
9
|
+
|
10
|
+
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
11
|
+
import com.google.api.client.googleapis.compute.ComputeCredential;
|
12
|
+
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
13
|
+
import com.google.api.client.http.HttpTransport;
|
14
|
+
import com.google.api.client.json.JsonFactory;
|
15
|
+
import com.google.api.client.json.jackson2.JacksonFactory;
|
16
|
+
import com.google.api.client.http.HttpRequestInitializer;
|
17
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
18
|
+
import com.google.api.services.storage.Storage;
|
19
|
+
import com.google.api.services.storage.StorageScopes;
|
20
|
+
import com.google.api.services.storage.model.Objects;
|
21
|
+
import org.embulk.spi.Exec;
|
22
|
+
import org.slf4j.Logger;
|
23
|
+
|
24
|
+
public class GcsAuthentication
|
25
|
+
{
|
26
|
+
private final Logger log = Exec.getLogger(GcsAuthentication.class);
|
27
|
+
private final Optional<String> serviceAccountEmail;
|
28
|
+
private final Optional<String> p12KeyFilePath;
|
29
|
+
private final String applicationName;
|
30
|
+
private final HttpTransport httpTransport;
|
31
|
+
private final JsonFactory jsonFactory;
|
32
|
+
private final HttpRequestInitializer credentials;
|
33
|
+
|
34
|
+
public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
|
35
|
+
throws IOException, GeneralSecurityException
|
36
|
+
{
|
37
|
+
this.serviceAccountEmail = serviceAccountEmail;
|
38
|
+
this.p12KeyFilePath = p12KeyFilePath;
|
39
|
+
this.applicationName = applicationName;
|
40
|
+
|
41
|
+
this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
42
|
+
this.jsonFactory = new JacksonFactory();
|
43
|
+
|
44
|
+
if (authMethod.equals("compute_engine")) {
|
45
|
+
this.credentials = getComputeCredential();
|
46
|
+
} else {
|
47
|
+
this.credentials = getServiceAccountCredential();
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
/**
|
52
|
+
* @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
|
53
|
+
*/
|
54
|
+
private GoogleCredential getServiceAccountCredential() throws IOException, GeneralSecurityException
|
55
|
+
{
|
56
|
+
// @see https://cloud.google.com/compute/docs/api/how-tos/authorization
|
57
|
+
// @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
|
58
|
+
// @see https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/BigqueryScopes.html
|
59
|
+
return new GoogleCredential.Builder()
|
60
|
+
.setTransport(httpTransport)
|
61
|
+
.setJsonFactory(jsonFactory)
|
62
|
+
.setServiceAccountId(serviceAccountEmail.orNull())
|
63
|
+
.setServiceAccountScopes(
|
64
|
+
ImmutableList.of(
|
65
|
+
StorageScopes.DEVSTORAGE_READ_ONLY
|
66
|
+
)
|
67
|
+
)
|
68
|
+
.setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.orNull()))
|
69
|
+
.build();
|
70
|
+
}
|
71
|
+
|
72
|
+
/**
|
73
|
+
* @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
|
74
|
+
* @see https://developers.google.com/accounts/docs/OAuth2
|
75
|
+
*/
|
76
|
+
private ComputeCredential getComputeCredential() throws IOException
|
77
|
+
{
|
78
|
+
ComputeCredential credential = new ComputeCredential.Builder(httpTransport, jsonFactory)
|
79
|
+
.build();
|
80
|
+
credential.refreshToken();
|
81
|
+
|
82
|
+
return credential;
|
83
|
+
}
|
84
|
+
|
85
|
+
public Storage getGcsClient(String bucket) throws GoogleJsonResponseException, IOException
|
86
|
+
{
|
87
|
+
Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
|
88
|
+
.setApplicationName(applicationName)
|
89
|
+
.build();
|
90
|
+
|
91
|
+
// For throw IOException when authentication is fail.
|
92
|
+
long maxResults = 1;
|
93
|
+
Objects objects = client.objects().list(bucket).setMaxResults(maxResults).execute();
|
94
|
+
|
95
|
+
return client;
|
96
|
+
}
|
97
|
+
}
|
@@ -1,10 +1,8 @@
|
|
1
1
|
package org.embulk.input.gcs;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
-
import java.util.Arrays;
|
5
4
|
import java.util.ArrayList;
|
6
5
|
import java.util.Collections;
|
7
|
-
import java.io.File;
|
8
6
|
import java.io.IOException;
|
9
7
|
import java.io.InputStream;
|
10
8
|
import java.math.BigInteger;
|
@@ -17,10 +15,10 @@ import org.embulk.config.Config;
|
|
17
15
|
import org.embulk.config.ConfigInject;
|
18
16
|
import org.embulk.config.ConfigDiff;
|
19
17
|
import org.embulk.config.ConfigDefault;
|
18
|
+
import org.embulk.config.ConfigException;
|
20
19
|
import org.embulk.config.ConfigSource;
|
21
20
|
import org.embulk.config.Task;
|
22
21
|
import org.embulk.config.TaskSource;
|
23
|
-
import org.embulk.config.TaskReport;
|
24
22
|
import org.embulk.spi.Exec;
|
25
23
|
import org.embulk.spi.BufferAllocator;
|
26
24
|
import org.embulk.spi.FileInputPlugin;
|
@@ -29,13 +27,7 @@ import org.embulk.spi.util.InputStreamFileInput;
|
|
29
27
|
|
30
28
|
import org.slf4j.Logger;
|
31
29
|
|
32
|
-
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
33
|
-
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
34
|
-
import com.google.api.client.http.HttpTransport;
|
35
|
-
import com.google.api.client.json.JsonFactory;
|
36
|
-
import com.google.api.client.json.jackson2.JacksonFactory;
|
37
30
|
import com.google.api.services.storage.Storage;
|
38
|
-
import com.google.api.services.storage.StorageScopes;
|
39
31
|
import com.google.api.services.storage.model.Bucket;
|
40
32
|
import com.google.api.services.storage.model.Objects;
|
41
33
|
import com.google.api.services.storage.model.StorageObject;
|
@@ -56,15 +48,21 @@ public class GcsFileInputPlugin
|
|
56
48
|
@ConfigDefault("null")
|
57
49
|
Optional<String> getLastPath();
|
58
50
|
|
51
|
+
@Config("auth_method")
|
52
|
+
@ConfigDefault("\"private_key\"")
|
53
|
+
AuthMethod getAuthMethod();
|
54
|
+
|
59
55
|
@Config("service_account_email")
|
60
|
-
|
56
|
+
@ConfigDefault("null")
|
57
|
+
Optional<String> getServiceAccountEmail();
|
61
58
|
|
62
59
|
@Config("application_name")
|
63
60
|
@ConfigDefault("\"Embulk GCS input plugin\"")
|
64
61
|
String getApplicationName();
|
65
62
|
|
66
63
|
@Config("p12_keyfile_fullpath")
|
67
|
-
|
64
|
+
@ConfigDefault("null")
|
65
|
+
Optional<String> getP12KeyfileFullpath();
|
68
66
|
|
69
67
|
List<String> getFiles();
|
70
68
|
void setFiles(List<String> files);
|
@@ -74,8 +72,6 @@ public class GcsFileInputPlugin
|
|
74
72
|
}
|
75
73
|
|
76
74
|
private static final Logger log = Exec.getLogger(GcsFileInputPlugin.class);
|
77
|
-
private static HttpTransport httpTransport;
|
78
|
-
private static JsonFactory jsonFactory;
|
79
75
|
|
80
76
|
@Override
|
81
77
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -83,13 +79,6 @@ public class GcsFileInputPlugin
|
|
83
79
|
{
|
84
80
|
PluginTask task = config.loadConfig(PluginTask.class);
|
85
81
|
|
86
|
-
try {
|
87
|
-
httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
88
|
-
jsonFactory = new JacksonFactory();
|
89
|
-
} catch (Exception e) {
|
90
|
-
log.warn("Could not generate http transport");
|
91
|
-
}
|
92
|
-
|
93
82
|
// list files recursively
|
94
83
|
task.setFiles(listFiles(task));
|
95
84
|
// number of processors is same with number of files
|
@@ -128,39 +117,14 @@ public class GcsFileInputPlugin
|
|
128
117
|
{
|
129
118
|
}
|
130
119
|
|
131
|
-
|
132
|
-
|
133
|
-
*/
|
134
|
-
private static GoogleCredential getCredentialProvider (PluginTask task)
|
135
|
-
{
|
120
|
+
private static Storage newGcsClient(final PluginTask task) {
|
121
|
+
Storage client = null;
|
136
122
|
try {
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
.setServiceAccountId(task.getServiceAccountEmail())
|
142
|
-
.setServiceAccountScopes(
|
143
|
-
ImmutableList.of(
|
144
|
-
StorageScopes.DEVSTORAGE_READ_ONLY
|
145
|
-
)
|
146
|
-
)
|
147
|
-
.setServiceAccountPrivateKeyFromP12File(new File(task.getP12KeyfileFullpath()))
|
148
|
-
.build();
|
149
|
-
return cred;
|
150
|
-
} catch (IOException e) {
|
151
|
-
log.warn(String.format("Could not load client secrets file %s", task.getP12KeyfileFullpath()));
|
152
|
-
} catch (GeneralSecurityException e) {
|
153
|
-
log.warn ("Google Authentication was failed");
|
123
|
+
GcsAuthentication auth = new GcsAuthentication(task.getAuthMethod().getString(), task.getServiceAccountEmail(), task.getP12KeyfileFullpath(), task.getApplicationName());
|
124
|
+
client = auth.getGcsClient(task.getBucket());
|
125
|
+
} catch (GeneralSecurityException | IOException ex) {
|
126
|
+
throw new ConfigException(ex);
|
154
127
|
}
|
155
|
-
return null;
|
156
|
-
}
|
157
|
-
|
158
|
-
private static Storage newGcsClient(PluginTask task)
|
159
|
-
{
|
160
|
-
GoogleCredential credentials = getCredentialProvider(task);
|
161
|
-
Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
|
162
|
-
.setApplicationName(task.getApplicationName())
|
163
|
-
.build();
|
164
128
|
|
165
129
|
return client;
|
166
130
|
}
|
@@ -291,4 +255,22 @@ public class GcsFileInputPlugin
|
|
291
255
|
@Override
|
292
256
|
public void close() { }
|
293
257
|
}
|
258
|
+
|
259
|
+
public enum AuthMethod
|
260
|
+
{
|
261
|
+
private_key("private_key"),
|
262
|
+
compute_engine("compute_engine");
|
263
|
+
|
264
|
+
private final String string;
|
265
|
+
|
266
|
+
AuthMethod(String string)
|
267
|
+
{
|
268
|
+
this.string = string;
|
269
|
+
}
|
270
|
+
|
271
|
+
public String getString()
|
272
|
+
{
|
273
|
+
return string;
|
274
|
+
}
|
275
|
+
}
|
294
276
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Reads files stored on Google Cloud Storage (Standard
|
41
|
+
description: Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)
|
42
42
|
email:
|
43
43
|
- satoshiakama@gmail.com
|
44
44
|
executables: []
|
@@ -54,11 +54,12 @@ files:
|
|
54
54
|
- gradlew.bat
|
55
55
|
- lib/embulk/input/gcs.rb
|
56
56
|
- settings.gradle
|
57
|
+
- src/main/java/org/embulk/input/gcs/GcsAuthentication.java
|
57
58
|
- src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
|
58
59
|
- src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
|
59
60
|
- classpath/commons-codec-1.3.jar
|
60
61
|
- classpath/commons-logging-1.1.1.jar
|
61
|
-
- classpath/embulk-input-gcs-0.1.
|
62
|
+
- classpath/embulk-input-gcs-0.1.6.jar
|
62
63
|
- classpath/google-api-client-1.19.1.jar
|
63
64
|
- classpath/google-api-services-storage-v1-rev27-1.19.1.jar
|
64
65
|
- classpath/google-http-client-1.19.0.jar
|
Binary file
|