embulk-input-gcs 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +22 -0
- data/README.md +55 -18
- data/build.gradle +1 -1
- data/classpath/embulk-input-gcs-0.1.7.jar +0 -0
- data/src/main/java/org/embulk/input/gcs/GcsAuthentication.java +16 -1
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +64 -4
- metadata +4 -3
- data/classpath/embulk-input-gcs-0.1.6.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24b44f4db77ef29422e03052b58cdf67378d5a79
|
4
|
+
data.tar.gz: 031a0686510df86a6b87780aa0da707cd7cd64cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb5815752627aeef5c32de944d23e78c03313970e574f61e3706808bb4314643311e4d581ae2fc7589ede24e80ff53236cdd0defecbd647a2d382d26b3630dcc
|
7
|
+
data.tar.gz: 5ce214a4e252f78bbc9a9215d3edcc0de671220e2f5f9697687e4b4ac0d9d05303a5a724f399ad1a8175a8a8d85bd50d977fc01af8a718e74cb50981e34b3b6d
|
data/ChangeLog
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Release 0.1.7 - 2015-10-06
|
2
|
+
|
3
|
+
* Added new auth method - json_keyfile of GCP(Google Cloud Platform)'s service account
|
4
|
+
* Supported mapreduce-executor
|
5
|
+
|
6
|
+
Release 0.1.6 - 2015-09-05
|
7
|
+
|
8
|
+
* Added new auth method - pre-defined access token of GCE(Google Compute Engine)
|
9
|
+
|
10
|
+
Release 0.1.5 - 2015-08-19
|
11
|
+
|
12
|
+
* Upgraded embulk version to 0.7.0
|
13
|
+
* Refactored
|
14
|
+
|
15
|
+
Release 0.1.4 - 2015-06-27
|
16
|
+
|
17
|
+
* Keep last last_path when input files is empty. @frsyuki thanks!
|
18
|
+
* Refactored error handling logics.
|
19
|
+
|
20
|
+
Release 0.1.3 - 2015-03-16
|
21
|
+
|
22
|
+
* Changed supported Java version from 8 to 7
|
data/README.md
CHANGED
@@ -16,7 +16,7 @@ embulk gem install embulk-input-gcs
|
|
16
16
|
|
17
17
|
### Google Service Account Settings
|
18
18
|
|
19
|
-
If you chose "private_key" as [auth_method](#Authentication), you can get service_account_email and private_key like below.
|
19
|
+
If you chose "private_key" or "json_key" as [auth_method](#Authentication), you can get service_account_email and private_key or json_key like below.
|
20
20
|
|
21
21
|
1. Make project at [Google Developers Console](https://console.developers.google.com/project).
|
22
22
|
|
@@ -26,9 +26,7 @@ If you chose "private_key" as [auth_method](#Authentication), you can get servic
|
|
26
26
|
|
27
27
|
embulk-input-gcs can run "read-only" scopes.
|
28
28
|
|
29
|
-
1. Generate private key in P12(PKCS12) format, and upload to machine.
|
30
|
-
|
31
|
-
1. Write "EMAIL_ADDRESS" and fullpath of PKCS12 private key in yaml.
|
29
|
+
1. Generate private key in P12(PKCS12) format or json_key, and upload to machine.
|
32
30
|
|
33
31
|
### run
|
34
32
|
|
@@ -40,9 +38,10 @@ embulk run /path/to/config.yml
|
|
40
38
|
|
41
39
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
42
40
|
- **path_prefix** prefix of target keys (string, required)
|
43
|
-
- **auth_method** (string, optional, "private_key" or "compute_engine". default value is "private_key")
|
44
|
-
- **service_account_email** Google Cloud Storage service_account_email (string, required)
|
45
|
-
- **
|
41
|
+
- **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
|
42
|
+
- **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
|
43
|
+
- **p12_keyfile** fullpath of p12 key (string, required when auth_method is private_key)
|
44
|
+
- **json_keyfile** fullpath of json_key (string, required when auth_method is json_key)
|
46
45
|
- **application_name** application name anything you like (string, optional)
|
47
46
|
|
48
47
|
## Example
|
@@ -54,7 +53,7 @@ in:
|
|
54
53
|
path_prefix: logs/csv-
|
55
54
|
auth_method: private_key #default
|
56
55
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
57
|
-
|
56
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
58
57
|
application_name: Anything you like
|
59
58
|
```
|
60
59
|
|
@@ -67,7 +66,7 @@ in:
|
|
67
66
|
path_prefix: sample_
|
68
67
|
auth_method: private_key #default
|
69
68
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
70
|
-
|
69
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
71
70
|
application_name: Anything you like
|
72
71
|
decoders:
|
73
72
|
- {type: gzip}
|
@@ -89,23 +88,61 @@ out: {type: stdout}
|
|
89
88
|
|
90
89
|
## Authentication
|
91
90
|
|
92
|
-
There are
|
91
|
+
There are three methods supported to fetch access token for the service account.
|
92
|
+
|
93
|
+
1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
|
94
|
+
2. JSON key of GCP(Google Cloud Platform)'s service account
|
95
|
+
3. Pre-defined access token (Google Compute Engine only)
|
96
|
+
|
97
|
+
### Public-Private key pair of GCP's service account
|
98
|
+
|
99
|
+
You first need to create a service account (client ID), download its private key and deploy the key with embulk.
|
100
|
+
|
101
|
+
```yaml
|
102
|
+
in:
|
103
|
+
type: gcs
|
104
|
+
auth_method: private_key
|
105
|
+
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
106
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
107
|
+
```
|
93
108
|
|
94
|
-
|
95
|
-
2. Pre-defined access token (Compute Engine only)
|
109
|
+
### JSON key of GCP's service account
|
96
110
|
|
97
|
-
|
98
|
-
|
111
|
+
You first need to create a service account (client ID), download its json key and deploy the key with embulk.
|
112
|
+
|
113
|
+
```yaml
|
114
|
+
in:
|
115
|
+
type: gcs
|
116
|
+
auth_method: json_key
|
117
|
+
json_keyfile: /path/to/json_keyfile.json
|
118
|
+
```
|
119
|
+
|
120
|
+
You can also embed contents of json_keyfile at config.yml.
|
121
|
+
|
122
|
+
```yaml
|
123
|
+
in:
|
124
|
+
type: gcs
|
125
|
+
auth_method: json_key
|
126
|
+
json_keyfile:
|
127
|
+
content: |
|
128
|
+
{
|
129
|
+
"private_key_id": "123456789",
|
130
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
|
131
|
+
"client_email": "..."
|
132
|
+
}
|
133
|
+
```
|
134
|
+
|
135
|
+
### Pre-defined access token(GCE only)
|
99
136
|
|
100
137
|
On the other hand, you don't need to explicitly create a service account for embulk when you
|
101
|
-
run embulk in Google Compute Engine. In this
|
102
|
-
add the API scope "https://www.googleapis.com/auth/
|
103
|
-
Compute Engine instance, then you can configure embulk like this.
|
138
|
+
run embulk in Google Compute Engine. In this third authentication method, you need to
|
139
|
+
add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
|
140
|
+
Compute Engine VM instance, then you can configure embulk like this.
|
104
141
|
|
105
142
|
[Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication)
|
106
143
|
|
107
144
|
```yaml
|
108
|
-
|
145
|
+
in:
|
109
146
|
type: gcs
|
110
147
|
auth_method: compute_engine
|
111
148
|
```
|
data/build.gradle
CHANGED
Binary file
|
@@ -1,11 +1,13 @@
|
|
1
1
|
package org.embulk.input.gcs;
|
2
2
|
|
3
3
|
import java.io.File;
|
4
|
+
import java.io.FileInputStream;
|
4
5
|
import java.io.IOException;
|
5
6
|
|
6
7
|
import com.google.common.base.Optional;
|
7
8
|
import com.google.common.collect.ImmutableList;
|
8
9
|
import java.security.GeneralSecurityException;
|
10
|
+
import java.util.Collections;
|
9
11
|
|
10
12
|
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
11
13
|
import com.google.api.client.googleapis.compute.ComputeCredential;
|
@@ -26,16 +28,19 @@ public class GcsAuthentication
|
|
26
28
|
private final Logger log = Exec.getLogger(GcsAuthentication.class);
|
27
29
|
private final Optional<String> serviceAccountEmail;
|
28
30
|
private final Optional<String> p12KeyFilePath;
|
31
|
+
private final Optional<String> jsonKeyFilePath;
|
29
32
|
private final String applicationName;
|
30
33
|
private final HttpTransport httpTransport;
|
31
34
|
private final JsonFactory jsonFactory;
|
32
35
|
private final HttpRequestInitializer credentials;
|
33
36
|
|
34
|
-
public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail,
|
37
|
+
public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail,
|
38
|
+
Optional<String> p12KeyFilePath, Optional<String> jsonKeyFilePath, String applicationName)
|
35
39
|
throws IOException, GeneralSecurityException
|
36
40
|
{
|
37
41
|
this.serviceAccountEmail = serviceAccountEmail;
|
38
42
|
this.p12KeyFilePath = p12KeyFilePath;
|
43
|
+
this.jsonKeyFilePath = jsonKeyFilePath;
|
39
44
|
this.applicationName = applicationName;
|
40
45
|
|
41
46
|
this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
@@ -43,6 +48,8 @@ public class GcsAuthentication
|
|
43
48
|
|
44
49
|
if (authMethod.equals("compute_engine")) {
|
45
50
|
this.credentials = getComputeCredential();
|
51
|
+
} else if(authMethod.toLowerCase().equals("json_key")) {
|
52
|
+
this.credentials = getServiceAccountCredentialFromJsonFile();
|
46
53
|
} else {
|
47
54
|
this.credentials = getServiceAccountCredential();
|
48
55
|
}
|
@@ -69,6 +76,14 @@ public class GcsAuthentication
|
|
69
76
|
.build();
|
70
77
|
}
|
71
78
|
|
79
|
+
private GoogleCredential getServiceAccountCredentialFromJsonFile() throws IOException
|
80
|
+
{
|
81
|
+
FileInputStream stream = new FileInputStream(jsonKeyFilePath.orNull());
|
82
|
+
|
83
|
+
return GoogleCredential.fromStream(stream, httpTransport, jsonFactory)
|
84
|
+
.createScoped(Collections.singleton(StorageScopes.DEVSTORAGE_READ_ONLY));
|
85
|
+
}
|
86
|
+
|
72
87
|
/**
|
73
88
|
* @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
|
74
89
|
* @see https://developers.google.com/accounts/docs/OAuth2
|
@@ -6,8 +6,11 @@ import java.util.Collections;
|
|
6
6
|
import java.io.IOException;
|
7
7
|
import java.io.InputStream;
|
8
8
|
import java.math.BigInteger;
|
9
|
+
|
9
10
|
import com.google.common.collect.ImmutableList;
|
10
11
|
import com.google.common.base.Optional;
|
12
|
+
import com.google.common.base.Function;
|
13
|
+
import com.google.common.base.Throwables;
|
11
14
|
import java.security.GeneralSecurityException;
|
12
15
|
|
13
16
|
import org.embulk.config.TaskReport;
|
@@ -23,6 +26,7 @@ import org.embulk.spi.Exec;
|
|
23
26
|
import org.embulk.spi.BufferAllocator;
|
24
27
|
import org.embulk.spi.FileInputPlugin;
|
25
28
|
import org.embulk.spi.TransactionalFileInput;
|
29
|
+
import org.embulk.spi.unit.LocalFile;
|
26
30
|
import org.embulk.spi.util.InputStreamFileInput;
|
27
31
|
|
28
32
|
import org.slf4j.Logger;
|
@@ -60,10 +64,20 @@ public class GcsFileInputPlugin
|
|
60
64
|
@ConfigDefault("\"Embulk GCS input plugin\"")
|
61
65
|
String getApplicationName();
|
62
66
|
|
67
|
+
// kept for backward compatibility
|
63
68
|
@Config("p12_keyfile_fullpath")
|
64
69
|
@ConfigDefault("null")
|
65
70
|
Optional<String> getP12KeyfileFullpath();
|
66
71
|
|
72
|
+
@Config("p12_keyfile")
|
73
|
+
@ConfigDefault("null")
|
74
|
+
Optional<LocalFile> getP12Keyfile();
|
75
|
+
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
76
|
+
|
77
|
+
@Config("json_keyfile")
|
78
|
+
@ConfigDefault("null")
|
79
|
+
Optional<LocalFile> getJsonKeyfile();
|
80
|
+
|
67
81
|
List<String> getFiles();
|
68
82
|
void setFiles(List<String> files);
|
69
83
|
|
@@ -72,6 +86,7 @@ public class GcsFileInputPlugin
|
|
72
86
|
}
|
73
87
|
|
74
88
|
private static final Logger log = Exec.getLogger(GcsFileInputPlugin.class);
|
89
|
+
private static GcsAuthentication auth;
|
75
90
|
|
76
91
|
@Override
|
77
92
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -79,6 +94,39 @@ public class GcsFileInputPlugin
|
|
79
94
|
{
|
80
95
|
PluginTask task = config.loadConfig(PluginTask.class);
|
81
96
|
|
97
|
+
if (task.getP12KeyfileFullpath().isPresent()) {
|
98
|
+
if (task.getP12Keyfile().isPresent()) {
|
99
|
+
throw new ConfigException("Setting both p12_keyfile_fullpath and p12_keyfile is invalid");
|
100
|
+
}
|
101
|
+
try {
|
102
|
+
task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfileFullpath().get())));
|
103
|
+
} catch (IOException ex) {
|
104
|
+
throw Throwables.propagate(ex);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
if (task.getAuthMethod().getString().equals("json_key")) {
|
109
|
+
if (!task.getJsonKeyfile().isPresent()) {
|
110
|
+
throw new ConfigException("If auth_method is json_key, you have to set json_keyfile");
|
111
|
+
}
|
112
|
+
} else if (task.getAuthMethod().getString().equals("private_key")) {
|
113
|
+
if (!task.getP12Keyfile().isPresent() || !task.getServiceAccountEmail().isPresent()) {
|
114
|
+
throw new ConfigException("If auth_method is private_key, you have to set both service_account_email and p12_keyfile");
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
try {
|
119
|
+
auth = new GcsAuthentication(
|
120
|
+
task.getAuthMethod().getString(),
|
121
|
+
task.getServiceAccountEmail(),
|
122
|
+
task.getP12Keyfile().transform(localFileToPathString()),
|
123
|
+
task.getJsonKeyfile().transform(localFileToPathString()),
|
124
|
+
task.getApplicationName()
|
125
|
+
);
|
126
|
+
} catch (GeneralSecurityException | IOException ex) {
|
127
|
+
throw new ConfigException(ex);
|
128
|
+
}
|
129
|
+
|
82
130
|
// list files recursively
|
83
131
|
task.setFiles(listFiles(task));
|
84
132
|
// number of processors is same with number of files
|
@@ -117,18 +165,29 @@ public class GcsFileInputPlugin
|
|
117
165
|
{
|
118
166
|
}
|
119
167
|
|
120
|
-
private static Storage newGcsClient(final PluginTask task)
|
168
|
+
private static Storage newGcsClient(final PluginTask task)
|
169
|
+
{
|
121
170
|
Storage client = null;
|
122
171
|
try {
|
123
|
-
GcsAuthentication auth = new GcsAuthentication(task.getAuthMethod().getString(), task.getServiceAccountEmail(), task.getP12KeyfileFullpath(), task.getApplicationName());
|
124
172
|
client = auth.getGcsClient(task.getBucket());
|
125
|
-
} catch (
|
173
|
+
} catch (IOException ex) {
|
126
174
|
throw new ConfigException(ex);
|
127
175
|
}
|
128
176
|
|
129
177
|
return client;
|
130
178
|
}
|
131
179
|
|
180
|
+
private Function<LocalFile, String> localFileToPathString()
|
181
|
+
{
|
182
|
+
return new Function<LocalFile, String>()
|
183
|
+
{
|
184
|
+
public String apply(LocalFile file)
|
185
|
+
{
|
186
|
+
return file.getPath().toString();
|
187
|
+
}
|
188
|
+
};
|
189
|
+
}
|
190
|
+
|
132
191
|
public List<String> listFiles(PluginTask task)
|
133
192
|
{
|
134
193
|
Storage client = newGcsClient(task);
|
@@ -259,7 +318,8 @@ public class GcsFileInputPlugin
|
|
259
318
|
public enum AuthMethod
|
260
319
|
{
|
261
320
|
private_key("private_key"),
|
262
|
-
compute_engine("compute_engine")
|
321
|
+
compute_engine("compute_engine"),
|
322
|
+
json_key("json_key");
|
263
323
|
|
264
324
|
private final String string;
|
265
325
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- ChangeLog
|
49
50
|
- README.md
|
50
51
|
- build.gradle
|
51
52
|
- gradle/wrapper/gradle-wrapper.jar
|
@@ -59,7 +60,7 @@ files:
|
|
59
60
|
- src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
|
60
61
|
- classpath/commons-codec-1.3.jar
|
61
62
|
- classpath/commons-logging-1.1.1.jar
|
62
|
-
- classpath/embulk-input-gcs-0.1.
|
63
|
+
- classpath/embulk-input-gcs-0.1.7.jar
|
63
64
|
- classpath/google-api-client-1.19.1.jar
|
64
65
|
- classpath/google-api-services-storage-v1-rev27-1.19.1.jar
|
65
66
|
- classpath/google-http-client-1.19.0.jar
|
Binary file
|