embulk-input-gcs 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +33 -4
- data/build.gradle +2 -2
- data/classpath/embulk-input-gcs-0.1.6.jar +0 -0
- data/src/main/java/org/embulk/input/gcs/GcsAuthentication.java +97 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +33 -51
- metadata +5 -4
- data/classpath/embulk-input-gcs-0.1.5.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1337868e0a547834610ae572d49228a3394aff7
|
4
|
+
data.tar.gz: 52e83dd8d9b91eb7ca715ef36b3d74f8241e2860
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b8980edca632680de44f5311b3e2a3c622d94e3ee15f76eade05a536eb96cf64397e8916b4b47516a0724b9a2d2b03666e78ff2bc9bdee60352a6714e3d3383e
|
7
|
+
data.tar.gz: 8ad0624eb9576e37dbeac7dc2eead16bc72bfae8f51274d81042f0657821024ea2595ccd71f637fa9c63af049dac6bdd61fc9fe5df24ec8d1b36ee3bfff85a56
|
data/README.md
CHANGED
@@ -15,13 +15,16 @@ embulk gem install embulk-input-gcs
|
|
15
15
|
```
|
16
16
|
|
17
17
|
### Google Service Account Settings
|
18
|
+
|
19
|
+
If you chose "private_key" as [auth_method](#Authentication), you can get service_account_email and private_key like below.
|
20
|
+
|
18
21
|
1. Make project at [Google Developers Console](https://console.developers.google.com/project).
|
19
22
|
|
20
23
|
1. Make "Service Account" with [this step](https://cloud.google.com/storage/docs/authentication#service_accounts).
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
|
25
|
+
Service Account has two specific scopes: read-only, read-write.
|
26
|
+
|
27
|
+
embulk-input-gcs can run "read-only" scopes.
|
25
28
|
|
26
29
|
1. Generate private key in P12(PKCS12) format, and upload to machine.
|
27
30
|
|
@@ -37,6 +40,7 @@ embulk run /path/to/config.yml
|
|
37
40
|
|
38
41
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
39
42
|
- **path_prefix** prefix of target keys (string, required)
|
43
|
+
- **auth_method** (string, optional, "private_key" or "compute_engine". default value is "private_key")
|
40
44
|
- **service_account_email** Google Cloud Storage service_account_email (string, required)
|
41
45
|
- **p12_keyfile_fullpath** fullpath of p12 key (string, required)
|
42
46
|
- **application_name** application name anything you like (string, optional)
|
@@ -48,6 +52,7 @@ in:
|
|
48
52
|
type: gcs
|
49
53
|
bucket: my-gcs-bucket
|
50
54
|
path_prefix: logs/csv-
|
55
|
+
auth_method: private_key #default
|
51
56
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
52
57
|
p12_keyfile_path: /path/to/p12_keyfile.p12
|
53
58
|
application_name: Anything you like
|
@@ -60,6 +65,7 @@ in:
|
|
60
65
|
type: gcs
|
61
66
|
bucket: my-gcs-bucket
|
62
67
|
path_prefix: sample_
|
68
|
+
auth_method: private_key #default
|
63
69
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
64
70
|
p12_keyfile_path: /path/to/p12_keyfile.p12
|
65
71
|
application_name: Anything you like
|
@@ -81,6 +87,29 @@ in:
|
|
81
87
|
out: {type: stdout}
|
82
88
|
```
|
83
89
|
|
90
|
+
## Authentication
|
91
|
+
|
92
|
+
There are two methods supported to fetch access token for the service account.
|
93
|
+
|
94
|
+
1. Public-Private key pair
|
95
|
+
2. Pre-defined access token (Compute Engine only)
|
96
|
+
|
97
|
+
The examples above use the first one. You first need to create a service account (client ID),
|
98
|
+
download its private key and deploy the key with embulk.
|
99
|
+
|
100
|
+
On the other hand, you don't need to explicitly create a service account for embulk when you
|
101
|
+
run embulk in Google Compute Engine. In this second authentication method, you need to
|
102
|
+
add the API scope "https://www.googleapis.com/auth/devstorage.read_only" to the scope list of your
|
103
|
+
Compute Engine instance, then you can configure embulk like this.
|
104
|
+
|
105
|
+
[Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication)
|
106
|
+
|
107
|
+
```yaml
|
108
|
+
input:
|
109
|
+
type: gcs
|
110
|
+
auth_method: compute_engine
|
111
|
+
```
|
112
|
+
|
84
113
|
## Build
|
85
114
|
|
86
115
|
```
|
data/build.gradle
CHANGED
@@ -15,7 +15,7 @@ configurations {
|
|
15
15
|
sourceCompatibility = 1.7
|
16
16
|
targetCompatibility = 1.7
|
17
17
|
|
18
|
-
version = "0.1.
|
18
|
+
version = "0.1.6"
|
19
19
|
|
20
20
|
dependencies {
|
21
21
|
compile "org.embulk:embulk-core:0.7.1"
|
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
|
|
46
46
|
spec.version = "${project.version}"
|
47
47
|
spec.authors = ["Satoshi Akama"]
|
48
48
|
spec.summary = %[Google Cloud Storage input plugin for Embulk]
|
49
|
-
spec.description = %[Reads files stored on Google Cloud Storage (Standard
|
49
|
+
spec.description = %[Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)]
|
50
50
|
spec.email = ["satoshiakama@gmail.com"]
|
51
51
|
spec.licenses = ["Apache-2.0"]
|
52
52
|
spec.homepage = "https://github.com/embulk/embulk-input-gcs"
|
Binary file
|
@@ -0,0 +1,97 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.IOException;
|
5
|
+
|
6
|
+
import com.google.common.base.Optional;
|
7
|
+
import com.google.common.collect.ImmutableList;
|
8
|
+
import java.security.GeneralSecurityException;
|
9
|
+
|
10
|
+
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
11
|
+
import com.google.api.client.googleapis.compute.ComputeCredential;
|
12
|
+
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
13
|
+
import com.google.api.client.http.HttpTransport;
|
14
|
+
import com.google.api.client.json.JsonFactory;
|
15
|
+
import com.google.api.client.json.jackson2.JacksonFactory;
|
16
|
+
import com.google.api.client.http.HttpRequestInitializer;
|
17
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
18
|
+
import com.google.api.services.storage.Storage;
|
19
|
+
import com.google.api.services.storage.StorageScopes;
|
20
|
+
import com.google.api.services.storage.model.Objects;
|
21
|
+
import org.embulk.spi.Exec;
|
22
|
+
import org.slf4j.Logger;
|
23
|
+
|
24
|
+
public class GcsAuthentication
|
25
|
+
{
|
26
|
+
private final Logger log = Exec.getLogger(GcsAuthentication.class);
|
27
|
+
private final Optional<String> serviceAccountEmail;
|
28
|
+
private final Optional<String> p12KeyFilePath;
|
29
|
+
private final String applicationName;
|
30
|
+
private final HttpTransport httpTransport;
|
31
|
+
private final JsonFactory jsonFactory;
|
32
|
+
private final HttpRequestInitializer credentials;
|
33
|
+
|
34
|
+
public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
|
35
|
+
throws IOException, GeneralSecurityException
|
36
|
+
{
|
37
|
+
this.serviceAccountEmail = serviceAccountEmail;
|
38
|
+
this.p12KeyFilePath = p12KeyFilePath;
|
39
|
+
this.applicationName = applicationName;
|
40
|
+
|
41
|
+
this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
42
|
+
this.jsonFactory = new JacksonFactory();
|
43
|
+
|
44
|
+
if (authMethod.equals("compute_engine")) {
|
45
|
+
this.credentials = getComputeCredential();
|
46
|
+
} else {
|
47
|
+
this.credentials = getServiceAccountCredential();
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
/**
|
52
|
+
* @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
|
53
|
+
*/
|
54
|
+
private GoogleCredential getServiceAccountCredential() throws IOException, GeneralSecurityException
|
55
|
+
{
|
56
|
+
// @see https://cloud.google.com/compute/docs/api/how-tos/authorization
|
57
|
+
// @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
|
58
|
+
// @see https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/BigqueryScopes.html
|
59
|
+
return new GoogleCredential.Builder()
|
60
|
+
.setTransport(httpTransport)
|
61
|
+
.setJsonFactory(jsonFactory)
|
62
|
+
.setServiceAccountId(serviceAccountEmail.orNull())
|
63
|
+
.setServiceAccountScopes(
|
64
|
+
ImmutableList.of(
|
65
|
+
StorageScopes.DEVSTORAGE_READ_ONLY
|
66
|
+
)
|
67
|
+
)
|
68
|
+
.setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.orNull()))
|
69
|
+
.build();
|
70
|
+
}
|
71
|
+
|
72
|
+
/**
|
73
|
+
* @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
|
74
|
+
* @see https://developers.google.com/accounts/docs/OAuth2
|
75
|
+
*/
|
76
|
+
private ComputeCredential getComputeCredential() throws IOException
|
77
|
+
{
|
78
|
+
ComputeCredential credential = new ComputeCredential.Builder(httpTransport, jsonFactory)
|
79
|
+
.build();
|
80
|
+
credential.refreshToken();
|
81
|
+
|
82
|
+
return credential;
|
83
|
+
}
|
84
|
+
|
85
|
+
public Storage getGcsClient(String bucket) throws GoogleJsonResponseException, IOException
|
86
|
+
{
|
87
|
+
Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
|
88
|
+
.setApplicationName(applicationName)
|
89
|
+
.build();
|
90
|
+
|
91
|
+
// For throw IOException when authentication is fail.
|
92
|
+
long maxResults = 1;
|
93
|
+
Objects objects = client.objects().list(bucket).setMaxResults(maxResults).execute();
|
94
|
+
|
95
|
+
return client;
|
96
|
+
}
|
97
|
+
}
|
@@ -1,10 +1,8 @@
|
|
1
1
|
package org.embulk.input.gcs;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
-
import java.util.Arrays;
|
5
4
|
import java.util.ArrayList;
|
6
5
|
import java.util.Collections;
|
7
|
-
import java.io.File;
|
8
6
|
import java.io.IOException;
|
9
7
|
import java.io.InputStream;
|
10
8
|
import java.math.BigInteger;
|
@@ -17,10 +15,10 @@ import org.embulk.config.Config;
|
|
17
15
|
import org.embulk.config.ConfigInject;
|
18
16
|
import org.embulk.config.ConfigDiff;
|
19
17
|
import org.embulk.config.ConfigDefault;
|
18
|
+
import org.embulk.config.ConfigException;
|
20
19
|
import org.embulk.config.ConfigSource;
|
21
20
|
import org.embulk.config.Task;
|
22
21
|
import org.embulk.config.TaskSource;
|
23
|
-
import org.embulk.config.TaskReport;
|
24
22
|
import org.embulk.spi.Exec;
|
25
23
|
import org.embulk.spi.BufferAllocator;
|
26
24
|
import org.embulk.spi.FileInputPlugin;
|
@@ -29,13 +27,7 @@ import org.embulk.spi.util.InputStreamFileInput;
|
|
29
27
|
|
30
28
|
import org.slf4j.Logger;
|
31
29
|
|
32
|
-
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
33
|
-
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
34
|
-
import com.google.api.client.http.HttpTransport;
|
35
|
-
import com.google.api.client.json.JsonFactory;
|
36
|
-
import com.google.api.client.json.jackson2.JacksonFactory;
|
37
30
|
import com.google.api.services.storage.Storage;
|
38
|
-
import com.google.api.services.storage.StorageScopes;
|
39
31
|
import com.google.api.services.storage.model.Bucket;
|
40
32
|
import com.google.api.services.storage.model.Objects;
|
41
33
|
import com.google.api.services.storage.model.StorageObject;
|
@@ -56,15 +48,21 @@ public class GcsFileInputPlugin
|
|
56
48
|
@ConfigDefault("null")
|
57
49
|
Optional<String> getLastPath();
|
58
50
|
|
51
|
+
@Config("auth_method")
|
52
|
+
@ConfigDefault("\"private_key\"")
|
53
|
+
AuthMethod getAuthMethod();
|
54
|
+
|
59
55
|
@Config("service_account_email")
|
60
|
-
|
56
|
+
@ConfigDefault("null")
|
57
|
+
Optional<String> getServiceAccountEmail();
|
61
58
|
|
62
59
|
@Config("application_name")
|
63
60
|
@ConfigDefault("\"Embulk GCS input plugin\"")
|
64
61
|
String getApplicationName();
|
65
62
|
|
66
63
|
@Config("p12_keyfile_fullpath")
|
67
|
-
|
64
|
+
@ConfigDefault("null")
|
65
|
+
Optional<String> getP12KeyfileFullpath();
|
68
66
|
|
69
67
|
List<String> getFiles();
|
70
68
|
void setFiles(List<String> files);
|
@@ -74,8 +72,6 @@ public class GcsFileInputPlugin
|
|
74
72
|
}
|
75
73
|
|
76
74
|
private static final Logger log = Exec.getLogger(GcsFileInputPlugin.class);
|
77
|
-
private static HttpTransport httpTransport;
|
78
|
-
private static JsonFactory jsonFactory;
|
79
75
|
|
80
76
|
@Override
|
81
77
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -83,13 +79,6 @@ public class GcsFileInputPlugin
|
|
83
79
|
{
|
84
80
|
PluginTask task = config.loadConfig(PluginTask.class);
|
85
81
|
|
86
|
-
try {
|
87
|
-
httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
88
|
-
jsonFactory = new JacksonFactory();
|
89
|
-
} catch (Exception e) {
|
90
|
-
log.warn("Could not generate http transport");
|
91
|
-
}
|
92
|
-
|
93
82
|
// list files recursively
|
94
83
|
task.setFiles(listFiles(task));
|
95
84
|
// number of processors is same with number of files
|
@@ -128,39 +117,14 @@ public class GcsFileInputPlugin
|
|
128
117
|
{
|
129
118
|
}
|
130
119
|
|
131
|
-
|
132
|
-
|
133
|
-
*/
|
134
|
-
private static GoogleCredential getCredentialProvider (PluginTask task)
|
135
|
-
{
|
120
|
+
private static Storage newGcsClient(final PluginTask task) {
|
121
|
+
Storage client = null;
|
136
122
|
try {
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
.setServiceAccountId(task.getServiceAccountEmail())
|
142
|
-
.setServiceAccountScopes(
|
143
|
-
ImmutableList.of(
|
144
|
-
StorageScopes.DEVSTORAGE_READ_ONLY
|
145
|
-
)
|
146
|
-
)
|
147
|
-
.setServiceAccountPrivateKeyFromP12File(new File(task.getP12KeyfileFullpath()))
|
148
|
-
.build();
|
149
|
-
return cred;
|
150
|
-
} catch (IOException e) {
|
151
|
-
log.warn(String.format("Could not load client secrets file %s", task.getP12KeyfileFullpath()));
|
152
|
-
} catch (GeneralSecurityException e) {
|
153
|
-
log.warn ("Google Authentication was failed");
|
123
|
+
GcsAuthentication auth = new GcsAuthentication(task.getAuthMethod().getString(), task.getServiceAccountEmail(), task.getP12KeyfileFullpath(), task.getApplicationName());
|
124
|
+
client = auth.getGcsClient(task.getBucket());
|
125
|
+
} catch (GeneralSecurityException | IOException ex) {
|
126
|
+
throw new ConfigException(ex);
|
154
127
|
}
|
155
|
-
return null;
|
156
|
-
}
|
157
|
-
|
158
|
-
private static Storage newGcsClient(PluginTask task)
|
159
|
-
{
|
160
|
-
GoogleCredential credentials = getCredentialProvider(task);
|
161
|
-
Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
|
162
|
-
.setApplicationName(task.getApplicationName())
|
163
|
-
.build();
|
164
128
|
|
165
129
|
return client;
|
166
130
|
}
|
@@ -291,4 +255,22 @@ public class GcsFileInputPlugin
|
|
291
255
|
@Override
|
292
256
|
public void close() { }
|
293
257
|
}
|
258
|
+
|
259
|
+
public enum AuthMethod
|
260
|
+
{
|
261
|
+
private_key("private_key"),
|
262
|
+
compute_engine("compute_engine");
|
263
|
+
|
264
|
+
private final String string;
|
265
|
+
|
266
|
+
AuthMethod(String string)
|
267
|
+
{
|
268
|
+
this.string = string;
|
269
|
+
}
|
270
|
+
|
271
|
+
public String getString()
|
272
|
+
{
|
273
|
+
return string;
|
274
|
+
}
|
275
|
+
}
|
294
276
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Reads files stored on Google Cloud Storage (Standard
|
41
|
+
description: Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)
|
42
42
|
email:
|
43
43
|
- satoshiakama@gmail.com
|
44
44
|
executables: []
|
@@ -54,11 +54,12 @@ files:
|
|
54
54
|
- gradlew.bat
|
55
55
|
- lib/embulk/input/gcs.rb
|
56
56
|
- settings.gradle
|
57
|
+
- src/main/java/org/embulk/input/gcs/GcsAuthentication.java
|
57
58
|
- src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
|
58
59
|
- src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
|
59
60
|
- classpath/commons-codec-1.3.jar
|
60
61
|
- classpath/commons-logging-1.1.1.jar
|
61
|
-
- classpath/embulk-input-gcs-0.1.
|
62
|
+
- classpath/embulk-input-gcs-0.1.6.jar
|
62
63
|
- classpath/google-api-client-1.19.1.jar
|
63
64
|
- classpath/google-api-services-storage-v1-rev27-1.19.1.jar
|
64
65
|
- classpath/google-http-client-1.19.0.jar
|
Binary file
|