embulk-input-gcs 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +22 -0
- data/README.md +55 -18
- data/build.gradle +1 -1
- data/classpath/embulk-input-gcs-0.1.7.jar +0 -0
- data/src/main/java/org/embulk/input/gcs/GcsAuthentication.java +16 -1
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +64 -4
- metadata +4 -3
- data/classpath/embulk-input-gcs-0.1.6.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24b44f4db77ef29422e03052b58cdf67378d5a79
|
4
|
+
data.tar.gz: 031a0686510df86a6b87780aa0da707cd7cd64cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb5815752627aeef5c32de944d23e78c03313970e574f61e3706808bb4314643311e4d581ae2fc7589ede24e80ff53236cdd0defecbd647a2d382d26b3630dcc
|
7
|
+
data.tar.gz: 5ce214a4e252f78bbc9a9215d3edcc0de671220e2f5f9697687e4b4ac0d9d05303a5a724f399ad1a8175a8a8d85bd50d977fc01af8a718e74cb50981e34b3b6d
|
data/ChangeLog
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Release 0.1.7 - 2015-10-06
|
2
|
+
|
3
|
+
* Added new auth method - json_keyfile of GCP(Google Cloud Platform)'s service account
|
4
|
+
* Supported mapreduce-executor
|
5
|
+
|
6
|
+
Release 0.1.6 - 2015-09-05
|
7
|
+
|
8
|
+
* Added new auth method - pre-defined access token of GCE(Google Compute Engine)
|
9
|
+
|
10
|
+
Release 0.1.5 - 2015-08-19
|
11
|
+
|
12
|
+
* Upgraded embulk version to 0.7.0
|
13
|
+
* Refactored
|
14
|
+
|
15
|
+
Release 0.1.4 - 2015-06-27
|
16
|
+
|
17
|
+
* Keep last last_path when input files is empty. @frsyuki thanks!
|
18
|
+
* Refactored error handling logics.
|
19
|
+
|
20
|
+
Release 0.1.3 - 2015-03-16
|
21
|
+
|
22
|
+
* Changed supported Java version from 8 to 7
|
data/README.md
CHANGED
@@ -16,7 +16,7 @@ embulk gem install embulk-input-gcs
|
|
16
16
|
|
17
17
|
### Google Service Account Settings
|
18
18
|
|
19
|
-
If you chose "private_key" as [auth_method](#Authentication), you can get service_account_email and private_key like below.
|
19
|
+
If you chose "private_key" or "json_key" as [auth_method](#Authentication), you can get service_account_email and private_key or json_key like below.
|
20
20
|
|
21
21
|
1. Make project at [Google Developers Console](https://console.developers.google.com/project).
|
22
22
|
|
@@ -26,9 +26,7 @@ If you chose "private_key" as [auth_method](#Authentication), you can get servic
|
|
26
26
|
|
27
27
|
embulk-input-gcs can run "read-only" scopes.
|
28
28
|
|
29
|
-
1. Generate private key in P12(PKCS12) format, and upload to machine.
|
30
|
-
|
31
|
-
1. Write "EMAIL_ADDRESS" and fullpath of PKCS12 private key in yaml.
|
29
|
+
1. Generate private key in P12(PKCS12) format or json_key, and upload to machine.
|
32
30
|
|
33
31
|
### run
|
34
32
|
|
@@ -40,9 +38,10 @@ embulk run /path/to/config.yml
|
|
40
38
|
|
41
39
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
42
40
|
- **path_prefix** prefix of target keys (string, required)
|
43
|
-
- **auth_method** (string, optional, "private_key" or "compute_engine". default value is "private_key")
|
44
|
-
- **service_account_email** Google Cloud Storage service_account_email (string, required)
|
45
|
-
- **
|
41
|
+
- **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
|
42
|
+
- **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
|
43
|
+
- **p12_keyfile** fullpath of p12 key (string, required when auth_method is private_key)
|
44
|
+
- **json_keyfile** fullpath of json_key (string, required when auth_method is json_key)
|
46
45
|
- **application_name** application name anything you like (string, optional)
|
47
46
|
|
48
47
|
## Example
|
@@ -54,7 +53,7 @@ in:
|
|
54
53
|
path_prefix: logs/csv-
|
55
54
|
auth_method: private_key #default
|
56
55
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
57
|
-
|
56
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
58
57
|
application_name: Anything you like
|
59
58
|
```
|
60
59
|
|
@@ -67,7 +66,7 @@ in:
|
|
67
66
|
path_prefix: sample_
|
68
67
|
auth_method: private_key #default
|
69
68
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
70
|
-
|
69
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
71
70
|
application_name: Anything you like
|
72
71
|
decoders:
|
73
72
|
- {type: gzip}
|
@@ -89,23 +88,61 @@ out: {type: stdout}
|
|
89
88
|
|
90
89
|
## Authentication
|
91
90
|
|
92
|
-
There are
|
91
|
+
There are three methods supported to fetch access token for the service account.
|
92
|
+
|
93
|
+
1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
|
94
|
+
2. JSON key of GCP(Google Cloud Platform)'s service account
|
95
|
+
3. Pre-defined access token (Google Compute Engine only)
|
96
|
+
|
97
|
+
### Public-Private key pair of GCP's service account
|
98
|
+
|
99
|
+
You first need to create a service account (client ID), download its private key and deploy the key with embulk.
|
100
|
+
|
101
|
+
```yaml
|
102
|
+
in:
|
103
|
+
type: gcs
|
104
|
+
auth_method: private_key
|
105
|
+
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
106
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
107
|
+
```
|
93
108
|
|
94
|
-
|
95
|
-
2. Pre-defined access token (Compute Engine only)
|
109
|
+
### JSON key of GCP's service account
|
96
110
|
|
97
|
-
|
98
|
-
|
111
|
+
You first need to create a service account (client ID), download its json key and deploy the key with embulk.
|
112
|
+
|
113
|
+
```yaml
|
114
|
+
in:
|
115
|
+
type: gcs
|
116
|
+
auth_method: json_key
|
117
|
+
json_keyfile: /path/to/json_keyfile.json
|
118
|
+
```
|
119
|
+
|
120
|
+
You can also embed contents of json_keyfile at config.yml.
|
121
|
+
|
122
|
+
```yaml
|
123
|
+
in:
|
124
|
+
type: gcs
|
125
|
+
auth_method: json_key
|
126
|
+
json_keyfile:
|
127
|
+
content: |
|
128
|
+
{
|
129
|
+
"private_key_id": "123456789",
|
130
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
|
131
|
+
"client_email": "..."
|
132
|
+
}
|
133
|
+
```
|
134
|
+
|
135
|
+
### Pre-defined access token(GCE only)
|
99
136
|
|
100
137
|
On the other hand, you don't need to explicitly create a service account for embulk when you
|
101
|
-
run embulk in Google Compute Engine. In this
|
102
|
-
add the API scope "https://www.googleapis.com/auth/
|
103
|
-
Compute Engine instance, then you can configure embulk like this.
|
138
|
+
run embulk in Google Compute Engine. In this third authentication method, you need to
|
139
|
+
add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
|
140
|
+
Compute Engine VM instance, then you can configure embulk like this.
|
104
141
|
|
105
142
|
[Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication)
|
106
143
|
|
107
144
|
```yaml
|
108
|
-
|
145
|
+
in:
|
109
146
|
type: gcs
|
110
147
|
auth_method: compute_engine
|
111
148
|
```
|
data/build.gradle
CHANGED
Binary file
|
@@ -1,11 +1,13 @@
|
|
1
1
|
package org.embulk.input.gcs;
|
2
2
|
|
3
3
|
import java.io.File;
|
4
|
+
import java.io.FileInputStream;
|
4
5
|
import java.io.IOException;
|
5
6
|
|
6
7
|
import com.google.common.base.Optional;
|
7
8
|
import com.google.common.collect.ImmutableList;
|
8
9
|
import java.security.GeneralSecurityException;
|
10
|
+
import java.util.Collections;
|
9
11
|
|
10
12
|
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
11
13
|
import com.google.api.client.googleapis.compute.ComputeCredential;
|
@@ -26,16 +28,19 @@ public class GcsAuthentication
|
|
26
28
|
private final Logger log = Exec.getLogger(GcsAuthentication.class);
|
27
29
|
private final Optional<String> serviceAccountEmail;
|
28
30
|
private final Optional<String> p12KeyFilePath;
|
31
|
+
private final Optional<String> jsonKeyFilePath;
|
29
32
|
private final String applicationName;
|
30
33
|
private final HttpTransport httpTransport;
|
31
34
|
private final JsonFactory jsonFactory;
|
32
35
|
private final HttpRequestInitializer credentials;
|
33
36
|
|
34
|
-
public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail,
|
37
|
+
public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail,
|
38
|
+
Optional<String> p12KeyFilePath, Optional<String> jsonKeyFilePath, String applicationName)
|
35
39
|
throws IOException, GeneralSecurityException
|
36
40
|
{
|
37
41
|
this.serviceAccountEmail = serviceAccountEmail;
|
38
42
|
this.p12KeyFilePath = p12KeyFilePath;
|
43
|
+
this.jsonKeyFilePath = jsonKeyFilePath;
|
39
44
|
this.applicationName = applicationName;
|
40
45
|
|
41
46
|
this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
@@ -43,6 +48,8 @@ public class GcsAuthentication
|
|
43
48
|
|
44
49
|
if (authMethod.equals("compute_engine")) {
|
45
50
|
this.credentials = getComputeCredential();
|
51
|
+
} else if(authMethod.toLowerCase().equals("json_key")) {
|
52
|
+
this.credentials = getServiceAccountCredentialFromJsonFile();
|
46
53
|
} else {
|
47
54
|
this.credentials = getServiceAccountCredential();
|
48
55
|
}
|
@@ -69,6 +76,14 @@ public class GcsAuthentication
|
|
69
76
|
.build();
|
70
77
|
}
|
71
78
|
|
79
|
+
private GoogleCredential getServiceAccountCredentialFromJsonFile() throws IOException
|
80
|
+
{
|
81
|
+
FileInputStream stream = new FileInputStream(jsonKeyFilePath.orNull());
|
82
|
+
|
83
|
+
return GoogleCredential.fromStream(stream, httpTransport, jsonFactory)
|
84
|
+
.createScoped(Collections.singleton(StorageScopes.DEVSTORAGE_READ_ONLY));
|
85
|
+
}
|
86
|
+
|
72
87
|
/**
|
73
88
|
* @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
|
74
89
|
* @see https://developers.google.com/accounts/docs/OAuth2
|
@@ -6,8 +6,11 @@ import java.util.Collections;
|
|
6
6
|
import java.io.IOException;
|
7
7
|
import java.io.InputStream;
|
8
8
|
import java.math.BigInteger;
|
9
|
+
|
9
10
|
import com.google.common.collect.ImmutableList;
|
10
11
|
import com.google.common.base.Optional;
|
12
|
+
import com.google.common.base.Function;
|
13
|
+
import com.google.common.base.Throwables;
|
11
14
|
import java.security.GeneralSecurityException;
|
12
15
|
|
13
16
|
import org.embulk.config.TaskReport;
|
@@ -23,6 +26,7 @@ import org.embulk.spi.Exec;
|
|
23
26
|
import org.embulk.spi.BufferAllocator;
|
24
27
|
import org.embulk.spi.FileInputPlugin;
|
25
28
|
import org.embulk.spi.TransactionalFileInput;
|
29
|
+
import org.embulk.spi.unit.LocalFile;
|
26
30
|
import org.embulk.spi.util.InputStreamFileInput;
|
27
31
|
|
28
32
|
import org.slf4j.Logger;
|
@@ -60,10 +64,20 @@ public class GcsFileInputPlugin
|
|
60
64
|
@ConfigDefault("\"Embulk GCS input plugin\"")
|
61
65
|
String getApplicationName();
|
62
66
|
|
67
|
+
// kept for backward compatibility
|
63
68
|
@Config("p12_keyfile_fullpath")
|
64
69
|
@ConfigDefault("null")
|
65
70
|
Optional<String> getP12KeyfileFullpath();
|
66
71
|
|
72
|
+
@Config("p12_keyfile")
|
73
|
+
@ConfigDefault("null")
|
74
|
+
Optional<LocalFile> getP12Keyfile();
|
75
|
+
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
76
|
+
|
77
|
+
@Config("json_keyfile")
|
78
|
+
@ConfigDefault("null")
|
79
|
+
Optional<LocalFile> getJsonKeyfile();
|
80
|
+
|
67
81
|
List<String> getFiles();
|
68
82
|
void setFiles(List<String> files);
|
69
83
|
|
@@ -72,6 +86,7 @@ public class GcsFileInputPlugin
|
|
72
86
|
}
|
73
87
|
|
74
88
|
private static final Logger log = Exec.getLogger(GcsFileInputPlugin.class);
|
89
|
+
private static GcsAuthentication auth;
|
75
90
|
|
76
91
|
@Override
|
77
92
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -79,6 +94,39 @@ public class GcsFileInputPlugin
|
|
79
94
|
{
|
80
95
|
PluginTask task = config.loadConfig(PluginTask.class);
|
81
96
|
|
97
|
+
if (task.getP12KeyfileFullpath().isPresent()) {
|
98
|
+
if (task.getP12Keyfile().isPresent()) {
|
99
|
+
throw new ConfigException("Setting both p12_keyfile_fullpath and p12_keyfile is invalid");
|
100
|
+
}
|
101
|
+
try {
|
102
|
+
task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfileFullpath().get())));
|
103
|
+
} catch (IOException ex) {
|
104
|
+
throw Throwables.propagate(ex);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
if (task.getAuthMethod().getString().equals("json_key")) {
|
109
|
+
if (!task.getJsonKeyfile().isPresent()) {
|
110
|
+
throw new ConfigException("If auth_method is json_key, you have to set json_keyfile");
|
111
|
+
}
|
112
|
+
} else if (task.getAuthMethod().getString().equals("private_key")) {
|
113
|
+
if (!task.getP12Keyfile().isPresent() || !task.getServiceAccountEmail().isPresent()) {
|
114
|
+
throw new ConfigException("If auth_method is private_key, you have to set both service_account_email and p12_keyfile");
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
try {
|
119
|
+
auth = new GcsAuthentication(
|
120
|
+
task.getAuthMethod().getString(),
|
121
|
+
task.getServiceAccountEmail(),
|
122
|
+
task.getP12Keyfile().transform(localFileToPathString()),
|
123
|
+
task.getJsonKeyfile().transform(localFileToPathString()),
|
124
|
+
task.getApplicationName()
|
125
|
+
);
|
126
|
+
} catch (GeneralSecurityException | IOException ex) {
|
127
|
+
throw new ConfigException(ex);
|
128
|
+
}
|
129
|
+
|
82
130
|
// list files recursively
|
83
131
|
task.setFiles(listFiles(task));
|
84
132
|
// number of processors is same with number of files
|
@@ -117,18 +165,29 @@ public class GcsFileInputPlugin
|
|
117
165
|
{
|
118
166
|
}
|
119
167
|
|
120
|
-
private static Storage newGcsClient(final PluginTask task)
|
168
|
+
private static Storage newGcsClient(final PluginTask task)
|
169
|
+
{
|
121
170
|
Storage client = null;
|
122
171
|
try {
|
123
|
-
GcsAuthentication auth = new GcsAuthentication(task.getAuthMethod().getString(), task.getServiceAccountEmail(), task.getP12KeyfileFullpath(), task.getApplicationName());
|
124
172
|
client = auth.getGcsClient(task.getBucket());
|
125
|
-
} catch (
|
173
|
+
} catch (IOException ex) {
|
126
174
|
throw new ConfigException(ex);
|
127
175
|
}
|
128
176
|
|
129
177
|
return client;
|
130
178
|
}
|
131
179
|
|
180
|
+
private Function<LocalFile, String> localFileToPathString()
|
181
|
+
{
|
182
|
+
return new Function<LocalFile, String>()
|
183
|
+
{
|
184
|
+
public String apply(LocalFile file)
|
185
|
+
{
|
186
|
+
return file.getPath().toString();
|
187
|
+
}
|
188
|
+
};
|
189
|
+
}
|
190
|
+
|
132
191
|
public List<String> listFiles(PluginTask task)
|
133
192
|
{
|
134
193
|
Storage client = newGcsClient(task);
|
@@ -259,7 +318,8 @@ public class GcsFileInputPlugin
|
|
259
318
|
public enum AuthMethod
|
260
319
|
{
|
261
320
|
private_key("private_key"),
|
262
|
-
compute_engine("compute_engine")
|
321
|
+
compute_engine("compute_engine"),
|
322
|
+
json_key("json_key");
|
263
323
|
|
264
324
|
private final String string;
|
265
325
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- ChangeLog
|
49
50
|
- README.md
|
50
51
|
- build.gradle
|
51
52
|
- gradle/wrapper/gradle-wrapper.jar
|
@@ -59,7 +60,7 @@ files:
|
|
59
60
|
- src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
|
60
61
|
- classpath/commons-codec-1.3.jar
|
61
62
|
- classpath/commons-logging-1.1.1.jar
|
62
|
-
- classpath/embulk-input-gcs-0.1.
|
63
|
+
- classpath/embulk-input-gcs-0.1.7.jar
|
63
64
|
- classpath/google-api-client-1.19.1.jar
|
64
65
|
- classpath/google-api-services-storage-v1-rev27-1.19.1.jar
|
65
66
|
- classpath/google-http-client-1.19.0.jar
|
Binary file
|