embulk-input-gcs 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa681f0822424f0c593fd882cb028f6d1ffd1084
4
- data.tar.gz: 1f715e779e7997e3ebb52e72c3775f12aeb1c949
3
+ metadata.gz: c1337868e0a547834610ae572d49228a3394aff7
4
+ data.tar.gz: 52e83dd8d9b91eb7ca715ef36b3d74f8241e2860
5
5
  SHA512:
6
- metadata.gz: 45ee5d75433ded0336ff43fba3c0cb2091ff63d67cf5ed6b53ce9dca642ae6663f5147e7bf3c395bd982e5bfc1f858f721c8df023ab90559dec88b710ce0478f
7
- data.tar.gz: 78cba836847ea4f5ece8978b90130bdcab2bd31fe3bd7542da9f19bddda85ba2a286a878ddc7b4dd8d96828333386c824be51f84fc6d1c4dc54aeb2391991826
6
+ metadata.gz: b8980edca632680de44f5311b3e2a3c622d94e3ee15f76eade05a536eb96cf64397e8916b4b47516a0724b9a2d2b03666e78ff2bc9bdee60352a6714e3d3383e
7
+ data.tar.gz: 8ad0624eb9576e37dbeac7dc2eead16bc72bfae8f51274d81042f0657821024ea2595ccd71f637fa9c63af049dac6bdd61fc9fe5df24ec8d1b36ee3bfff85a56
data/README.md CHANGED
@@ -15,13 +15,16 @@ embulk gem install embulk-input-gcs
15
15
  ```
16
16
 
17
17
  ### Google Service Account Settings
18
+
19
+ If you chose "private_key" as [auth_method](#Authentication), you can get service_account_email and private_key like below.
20
+
18
21
  1. Make project at [Google Developers Console](https://console.developers.google.com/project).
19
22
 
20
23
  1. Make "Service Account" with [this step](https://cloud.google.com/storage/docs/authentication#service_accounts).
21
-
22
- Service Account has two specific scopes: read-only, read-write.
23
-
24
- embulk-input-gcs can run "read-only" scopes.
24
+
25
+ Service Account has two specific scopes: read-only, read-write.
26
+
27
+ embulk-input-gcs can run "read-only" scopes.
25
28
 
26
29
  1. Generate private key in P12(PKCS12) format, and upload to machine.
27
30
 
@@ -37,6 +40,7 @@ embulk run /path/to/config.yml
37
40
 
38
41
  - **bucket** Google Cloud Storage bucket name (string, required)
39
42
  - **path_prefix** prefix of target keys (string, required)
43
+ - **auth_method** (string, optional, "private_key" or "compute_engine". default value is "private_key")
40
44
  - **service_account_email** Google Cloud Storage service_account_email (string, required)
41
45
  - **p12_keyfile_fullpath** fullpath of p12 key (string, required)
42
46
  - **application_name** application name anything you like (string, optional)
@@ -48,6 +52,7 @@ in:
48
52
  type: gcs
49
53
  bucket: my-gcs-bucket
50
54
  path_prefix: logs/csv-
55
+ auth_method: private_key #default
51
56
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
52
57
  p12_keyfile_path: /path/to/p12_keyfile.p12
53
58
  application_name: Anything you like
@@ -60,6 +65,7 @@ in:
60
65
  type: gcs
61
66
  bucket: my-gcs-bucket
62
67
  path_prefix: sample_
68
+ auth_method: private_key #default
63
69
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
64
70
  p12_keyfile_path: /path/to/p12_keyfile.p12
65
71
  application_name: Anything you like
@@ -81,6 +87,29 @@ in:
81
87
  out: {type: stdout}
82
88
  ```
83
89
 
90
+ ## Authentication
91
+
92
+ There are two methods supported to fetch access token for the service account.
93
+
94
+ 1. Public-Private key pair
95
+ 2. Pre-defined access token (Compute Engine only)
96
+
97
+ The examples above use the first one. You first need to create a service account (client ID),
98
+ download its private key and deploy the key with embulk.
99
+
100
+ On the other hand, you don't need to explicitly create a service account for embulk when you
101
+ run embulk in Google Compute Engine. In this second authentication method, you need to
102
+ add the API scope "https://www.googleapis.com/auth/devstorage.read_only" to the scope list of your
103
+ Compute Engine instance, then you can configure embulk like this.
104
+
105
+ [Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication)
106
+
107
+ ```yaml
108
+ input:
109
+ type: gcs
110
+ auth_method: compute_engine
111
+ ```
112
+
84
113
  ## Build
85
114
 
86
115
  ```
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  sourceCompatibility = 1.7
16
16
  targetCompatibility = 1.7
17
17
 
18
- version = "0.1.5"
18
+ version = "0.1.6"
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.7.1"
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.version = "${project.version}"
47
47
  spec.authors = ["Satoshi Akama"]
48
48
  spec.summary = %[Google Cloud Storage input plugin for Embulk]
49
- spec.description = %[Reads files stored on Google Cloud Storage (Standard/Durable Reduced Availability/Nearline)]
49
+ spec.description = %[Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)]
50
50
  spec.email = ["satoshiakama@gmail.com"]
51
51
  spec.licenses = ["Apache-2.0"]
52
52
  spec.homepage = "https://github.com/embulk/embulk-input-gcs"
@@ -0,0 +1,97 @@
1
+ package org.embulk.input.gcs;
2
+
3
+ import java.io.File;
4
+ import java.io.IOException;
5
+
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.collect.ImmutableList;
8
+ import java.security.GeneralSecurityException;
9
+
10
+ import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
11
+ import com.google.api.client.googleapis.compute.ComputeCredential;
12
+ import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
13
+ import com.google.api.client.http.HttpTransport;
14
+ import com.google.api.client.json.JsonFactory;
15
+ import com.google.api.client.json.jackson2.JacksonFactory;
16
+ import com.google.api.client.http.HttpRequestInitializer;
17
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
18
+ import com.google.api.services.storage.Storage;
19
+ import com.google.api.services.storage.StorageScopes;
20
+ import com.google.api.services.storage.model.Objects;
21
+ import org.embulk.spi.Exec;
22
+ import org.slf4j.Logger;
23
+
24
+ public class GcsAuthentication
25
+ {
26
+ private final Logger log = Exec.getLogger(GcsAuthentication.class);
27
+ private final Optional<String> serviceAccountEmail;
28
+ private final Optional<String> p12KeyFilePath;
29
+ private final String applicationName;
30
+ private final HttpTransport httpTransport;
31
+ private final JsonFactory jsonFactory;
32
+ private final HttpRequestInitializer credentials;
33
+
34
+ public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
35
+ throws IOException, GeneralSecurityException
36
+ {
37
+ this.serviceAccountEmail = serviceAccountEmail;
38
+ this.p12KeyFilePath = p12KeyFilePath;
39
+ this.applicationName = applicationName;
40
+
41
+ this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
42
+ this.jsonFactory = new JacksonFactory();
43
+
44
+ if (authMethod.equals("compute_engine")) {
45
+ this.credentials = getComputeCredential();
46
+ } else {
47
+ this.credentials = getServiceAccountCredential();
48
+ }
49
+ }
50
+
51
+ /**
52
+ * @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
53
+ */
54
+ private GoogleCredential getServiceAccountCredential() throws IOException, GeneralSecurityException
55
+ {
56
+ // @see https://cloud.google.com/compute/docs/api/how-tos/authorization
57
+ // @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
58
+ // @see https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/BigqueryScopes.html
59
+ return new GoogleCredential.Builder()
60
+ .setTransport(httpTransport)
61
+ .setJsonFactory(jsonFactory)
62
+ .setServiceAccountId(serviceAccountEmail.orNull())
63
+ .setServiceAccountScopes(
64
+ ImmutableList.of(
65
+ StorageScopes.DEVSTORAGE_READ_ONLY
66
+ )
67
+ )
68
+ .setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.orNull()))
69
+ .build();
70
+ }
71
+
72
+ /**
73
+ * @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
74
+ * @see https://developers.google.com/accounts/docs/OAuth2
75
+ */
76
+ private ComputeCredential getComputeCredential() throws IOException
77
+ {
78
+ ComputeCredential credential = new ComputeCredential.Builder(httpTransport, jsonFactory)
79
+ .build();
80
+ credential.refreshToken();
81
+
82
+ return credential;
83
+ }
84
+
85
+ public Storage getGcsClient(String bucket) throws GoogleJsonResponseException, IOException
86
+ {
87
+ Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
88
+ .setApplicationName(applicationName)
89
+ .build();
90
+
91
+ // For throw IOException when authentication is fail.
92
+ long maxResults = 1;
93
+ Objects objects = client.objects().list(bucket).setMaxResults(maxResults).execute();
94
+
95
+ return client;
96
+ }
97
+ }
@@ -1,10 +1,8 @@
1
1
  package org.embulk.input.gcs;
2
2
 
3
3
  import java.util.List;
4
- import java.util.Arrays;
5
4
  import java.util.ArrayList;
6
5
  import java.util.Collections;
7
- import java.io.File;
8
6
  import java.io.IOException;
9
7
  import java.io.InputStream;
10
8
  import java.math.BigInteger;
@@ -17,10 +15,10 @@ import org.embulk.config.Config;
17
15
  import org.embulk.config.ConfigInject;
18
16
  import org.embulk.config.ConfigDiff;
19
17
  import org.embulk.config.ConfigDefault;
18
+ import org.embulk.config.ConfigException;
20
19
  import org.embulk.config.ConfigSource;
21
20
  import org.embulk.config.Task;
22
21
  import org.embulk.config.TaskSource;
23
- import org.embulk.config.TaskReport;
24
22
  import org.embulk.spi.Exec;
25
23
  import org.embulk.spi.BufferAllocator;
26
24
  import org.embulk.spi.FileInputPlugin;
@@ -29,13 +27,7 @@ import org.embulk.spi.util.InputStreamFileInput;
29
27
 
30
28
  import org.slf4j.Logger;
31
29
 
32
- import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
33
- import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
34
- import com.google.api.client.http.HttpTransport;
35
- import com.google.api.client.json.JsonFactory;
36
- import com.google.api.client.json.jackson2.JacksonFactory;
37
30
  import com.google.api.services.storage.Storage;
38
- import com.google.api.services.storage.StorageScopes;
39
31
  import com.google.api.services.storage.model.Bucket;
40
32
  import com.google.api.services.storage.model.Objects;
41
33
  import com.google.api.services.storage.model.StorageObject;
@@ -56,15 +48,21 @@ public class GcsFileInputPlugin
56
48
  @ConfigDefault("null")
57
49
  Optional<String> getLastPath();
58
50
 
51
+ @Config("auth_method")
52
+ @ConfigDefault("\"private_key\"")
53
+ AuthMethod getAuthMethod();
54
+
59
55
  @Config("service_account_email")
60
- String getServiceAccountEmail();
56
+ @ConfigDefault("null")
57
+ Optional<String> getServiceAccountEmail();
61
58
 
62
59
  @Config("application_name")
63
60
  @ConfigDefault("\"Embulk GCS input plugin\"")
64
61
  String getApplicationName();
65
62
 
66
63
  @Config("p12_keyfile_fullpath")
67
- String getP12KeyfileFullpath();
64
+ @ConfigDefault("null")
65
+ Optional<String> getP12KeyfileFullpath();
68
66
 
69
67
  List<String> getFiles();
70
68
  void setFiles(List<String> files);
@@ -74,8 +72,6 @@ public class GcsFileInputPlugin
74
72
  }
75
73
 
76
74
  private static final Logger log = Exec.getLogger(GcsFileInputPlugin.class);
77
- private static HttpTransport httpTransport;
78
- private static JsonFactory jsonFactory;
79
75
 
80
76
  @Override
81
77
  public ConfigDiff transaction(ConfigSource config,
@@ -83,13 +79,6 @@ public class GcsFileInputPlugin
83
79
  {
84
80
  PluginTask task = config.loadConfig(PluginTask.class);
85
81
 
86
- try {
87
- httpTransport = GoogleNetHttpTransport.newTrustedTransport();
88
- jsonFactory = new JacksonFactory();
89
- } catch (Exception e) {
90
- log.warn("Could not generate http transport");
91
- }
92
-
93
82
  // list files recursively
94
83
  task.setFiles(listFiles(task));
95
84
  // number of processors is same with number of files
@@ -128,39 +117,14 @@ public class GcsFileInputPlugin
128
117
  {
129
118
  }
130
119
 
131
- /**
132
- * @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
133
- */
134
- private static GoogleCredential getCredentialProvider (PluginTask task)
135
- {
120
+ private static Storage newGcsClient(final PluginTask task) {
121
+ Storage client = null;
136
122
  try {
137
- // @see https://cloud.google.com/compute/docs/api/how-tos/authorization
138
- // @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
139
- GoogleCredential cred = new GoogleCredential.Builder().setTransport(httpTransport)
140
- .setJsonFactory(jsonFactory)
141
- .setServiceAccountId(task.getServiceAccountEmail())
142
- .setServiceAccountScopes(
143
- ImmutableList.of(
144
- StorageScopes.DEVSTORAGE_READ_ONLY
145
- )
146
- )
147
- .setServiceAccountPrivateKeyFromP12File(new File(task.getP12KeyfileFullpath()))
148
- .build();
149
- return cred;
150
- } catch (IOException e) {
151
- log.warn(String.format("Could not load client secrets file %s", task.getP12KeyfileFullpath()));
152
- } catch (GeneralSecurityException e) {
153
- log.warn ("Google Authentication was failed");
123
+ GcsAuthentication auth = new GcsAuthentication(task.getAuthMethod().getString(), task.getServiceAccountEmail(), task.getP12KeyfileFullpath(), task.getApplicationName());
124
+ client = auth.getGcsClient(task.getBucket());
125
+ } catch (GeneralSecurityException | IOException ex) {
126
+ throw new ConfigException(ex);
154
127
  }
155
- return null;
156
- }
157
-
158
- private static Storage newGcsClient(PluginTask task)
159
- {
160
- GoogleCredential credentials = getCredentialProvider(task);
161
- Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
162
- .setApplicationName(task.getApplicationName())
163
- .build();
164
128
 
165
129
  return client;
166
130
  }
@@ -291,4 +255,22 @@ public class GcsFileInputPlugin
291
255
  @Override
292
256
  public void close() { }
293
257
  }
258
+
259
+ public enum AuthMethod
260
+ {
261
+ private_key("private_key"),
262
+ compute_engine("compute_engine");
263
+
264
+ private final String string;
265
+
266
+ AuthMethod(String string)
267
+ {
268
+ this.string = string;
269
+ }
270
+
271
+ public String getString()
272
+ {
273
+ return string;
274
+ }
275
+ }
294
276
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-19 00:00:00.000000000 Z
11
+ date: 2015-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
- description: Reads files stored on Google Cloud Storage (Standard/Durable Reduced Availability/Nearline)
41
+ description: Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)
42
42
  email:
43
43
  - satoshiakama@gmail.com
44
44
  executables: []
@@ -54,11 +54,12 @@ files:
54
54
  - gradlew.bat
55
55
  - lib/embulk/input/gcs.rb
56
56
  - settings.gradle
57
+ - src/main/java/org/embulk/input/gcs/GcsAuthentication.java
57
58
  - src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
58
59
  - src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
59
60
  - classpath/commons-codec-1.3.jar
60
61
  - classpath/commons-logging-1.1.1.jar
61
- - classpath/embulk-input-gcs-0.1.5.jar
62
+ - classpath/embulk-input-gcs-0.1.6.jar
62
63
  - classpath/google-api-client-1.19.1.jar
63
64
  - classpath/google-api-services-storage-v1-rev27-1.19.1.jar
64
65
  - classpath/google-http-client-1.19.0.jar
Binary file