embulk-input-gcs 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa681f0822424f0c593fd882cb028f6d1ffd1084
4
- data.tar.gz: 1f715e779e7997e3ebb52e72c3775f12aeb1c949
3
+ metadata.gz: c1337868e0a547834610ae572d49228a3394aff7
4
+ data.tar.gz: 52e83dd8d9b91eb7ca715ef36b3d74f8241e2860
5
5
  SHA512:
6
- metadata.gz: 45ee5d75433ded0336ff43fba3c0cb2091ff63d67cf5ed6b53ce9dca642ae6663f5147e7bf3c395bd982e5bfc1f858f721c8df023ab90559dec88b710ce0478f
7
- data.tar.gz: 78cba836847ea4f5ece8978b90130bdcab2bd31fe3bd7542da9f19bddda85ba2a286a878ddc7b4dd8d96828333386c824be51f84fc6d1c4dc54aeb2391991826
6
+ metadata.gz: b8980edca632680de44f5311b3e2a3c622d94e3ee15f76eade05a536eb96cf64397e8916b4b47516a0724b9a2d2b03666e78ff2bc9bdee60352a6714e3d3383e
7
+ data.tar.gz: 8ad0624eb9576e37dbeac7dc2eead16bc72bfae8f51274d81042f0657821024ea2595ccd71f637fa9c63af049dac6bdd61fc9fe5df24ec8d1b36ee3bfff85a56
data/README.md CHANGED
@@ -15,13 +15,16 @@ embulk gem install embulk-input-gcs
15
15
  ```
16
16
 
17
17
  ### Google Service Account Settings
18
+
19
+ If you chose "private_key" as [auth_method](#Authentication), you can get service_account_email and private_key like below.
20
+
18
21
  1. Make project at [Google Developers Console](https://console.developers.google.com/project).
19
22
 
20
23
  1. Make "Service Account" with [this step](https://cloud.google.com/storage/docs/authentication#service_accounts).
21
-
22
- Service Account has two specific scopes: read-only, read-write.
23
-
24
- embulk-input-gcs can run "read-only" scopes.
24
+
25
+ Service Account has two specific scopes: read-only, read-write.
26
+
27
+ embulk-input-gcs can run "read-only" scopes.
25
28
 
26
29
  1. Generate private key in P12(PKCS12) format, and upload to machine.
27
30
 
@@ -37,6 +40,7 @@ embulk run /path/to/config.yml
37
40
 
38
41
  - **bucket** Google Cloud Storage bucket name (string, required)
39
42
  - **path_prefix** prefix of target keys (string, required)
43
+ - **auth_method** (string, optional, "private_key" or "compute_engine". default value is "private_key")
40
44
  - **service_account_email** Google Cloud Storage service_account_email (string, required)
41
45
  - **p12_keyfile_fullpath** fullpath of p12 key (string, required)
42
46
  - **application_name** application name anything you like (string, optional)
@@ -48,6 +52,7 @@ in:
48
52
  type: gcs
49
53
  bucket: my-gcs-bucket
50
54
  path_prefix: logs/csv-
55
+ auth_method: private_key #default
51
56
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
52
57
  p12_keyfile_path: /path/to/p12_keyfile.p12
53
58
  application_name: Anything you like
@@ -60,6 +65,7 @@ in:
60
65
  type: gcs
61
66
  bucket: my-gcs-bucket
62
67
  path_prefix: sample_
68
+ auth_method: private_key #default
63
69
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
64
70
  p12_keyfile_path: /path/to/p12_keyfile.p12
65
71
  application_name: Anything you like
@@ -81,6 +87,29 @@ in:
81
87
  out: {type: stdout}
82
88
  ```
83
89
 
90
+ ## Authentication
91
+
92
+ There are two methods supported to fetch access token for the service account.
93
+
94
+ 1. Public-Private key pair
95
+ 2. Pre-defined access token (Compute Engine only)
96
+
97
+ The examples above use the first one. You first need to create a service account (client ID),
98
+ download its private key and deploy the key with embulk.
99
+
100
+ On the other hand, you don't need to explicitly create a service account for embulk when you
101
+ run embulk in Google Compute Engine. In this second authentication method, you need to
102
+ add the API scope "https://www.googleapis.com/auth/devstorage.read_only" to the scope list of your
103
+ Compute Engine instance, then you can configure embulk like this.
104
+
105
+ [Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication)
106
+
107
+ ```yaml
108
+ input:
109
+ type: gcs
110
+ auth_method: compute_engine
111
+ ```
112
+
84
113
  ## Build
85
114
 
86
115
  ```
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  sourceCompatibility = 1.7
16
16
  targetCompatibility = 1.7
17
17
 
18
- version = "0.1.5"
18
+ version = "0.1.6"
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.7.1"
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.version = "${project.version}"
47
47
  spec.authors = ["Satoshi Akama"]
48
48
  spec.summary = %[Google Cloud Storage input plugin for Embulk]
49
- spec.description = %[Reads files stored on Google Cloud Storage (Standard/Durable Reduced Availability/Nearline)]
49
+ spec.description = %[Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)]
50
50
  spec.email = ["satoshiakama@gmail.com"]
51
51
  spec.licenses = ["Apache-2.0"]
52
52
  spec.homepage = "https://github.com/embulk/embulk-input-gcs"
@@ -0,0 +1,97 @@
1
+ package org.embulk.input.gcs;
2
+
3
+ import java.io.File;
4
+ import java.io.IOException;
5
+
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.collect.ImmutableList;
8
+ import java.security.GeneralSecurityException;
9
+
10
+ import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
11
+ import com.google.api.client.googleapis.compute.ComputeCredential;
12
+ import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
13
+ import com.google.api.client.http.HttpTransport;
14
+ import com.google.api.client.json.JsonFactory;
15
+ import com.google.api.client.json.jackson2.JacksonFactory;
16
+ import com.google.api.client.http.HttpRequestInitializer;
17
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
18
+ import com.google.api.services.storage.Storage;
19
+ import com.google.api.services.storage.StorageScopes;
20
+ import com.google.api.services.storage.model.Objects;
21
+ import org.embulk.spi.Exec;
22
+ import org.slf4j.Logger;
23
+
24
+ public class GcsAuthentication
25
+ {
26
+ private final Logger log = Exec.getLogger(GcsAuthentication.class);
27
+ private final Optional<String> serviceAccountEmail;
28
+ private final Optional<String> p12KeyFilePath;
29
+ private final String applicationName;
30
+ private final HttpTransport httpTransport;
31
+ private final JsonFactory jsonFactory;
32
+ private final HttpRequestInitializer credentials;
33
+
34
+ public GcsAuthentication(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
35
+ throws IOException, GeneralSecurityException
36
+ {
37
+ this.serviceAccountEmail = serviceAccountEmail;
38
+ this.p12KeyFilePath = p12KeyFilePath;
39
+ this.applicationName = applicationName;
40
+
41
+ this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
42
+ this.jsonFactory = new JacksonFactory();
43
+
44
+ if (authMethod.equals("compute_engine")) {
45
+ this.credentials = getComputeCredential();
46
+ } else {
47
+ this.credentials = getServiceAccountCredential();
48
+ }
49
+ }
50
+
51
+ /**
52
+ * @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
53
+ */
54
+ private GoogleCredential getServiceAccountCredential() throws IOException, GeneralSecurityException
55
+ {
56
+ // @see https://cloud.google.com/compute/docs/api/how-tos/authorization
57
+ // @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
58
+ // @see https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/BigqueryScopes.html
59
+ return new GoogleCredential.Builder()
60
+ .setTransport(httpTransport)
61
+ .setJsonFactory(jsonFactory)
62
+ .setServiceAccountId(serviceAccountEmail.orNull())
63
+ .setServiceAccountScopes(
64
+ ImmutableList.of(
65
+ StorageScopes.DEVSTORAGE_READ_ONLY
66
+ )
67
+ )
68
+ .setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.orNull()))
69
+ .build();
70
+ }
71
+
72
+ /**
73
+ * @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
74
+ * @see https://developers.google.com/accounts/docs/OAuth2
75
+ */
76
+ private ComputeCredential getComputeCredential() throws IOException
77
+ {
78
+ ComputeCredential credential = new ComputeCredential.Builder(httpTransport, jsonFactory)
79
+ .build();
80
+ credential.refreshToken();
81
+
82
+ return credential;
83
+ }
84
+
85
+ public Storage getGcsClient(String bucket) throws GoogleJsonResponseException, IOException
86
+ {
87
+ Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
88
+ .setApplicationName(applicationName)
89
+ .build();
90
+
91
+ // For throw IOException when authentication is fail.
92
+ long maxResults = 1;
93
+ Objects objects = client.objects().list(bucket).setMaxResults(maxResults).execute();
94
+
95
+ return client;
96
+ }
97
+ }
@@ -1,10 +1,8 @@
1
1
  package org.embulk.input.gcs;
2
2
 
3
3
  import java.util.List;
4
- import java.util.Arrays;
5
4
  import java.util.ArrayList;
6
5
  import java.util.Collections;
7
- import java.io.File;
8
6
  import java.io.IOException;
9
7
  import java.io.InputStream;
10
8
  import java.math.BigInteger;
@@ -17,10 +15,10 @@ import org.embulk.config.Config;
17
15
  import org.embulk.config.ConfigInject;
18
16
  import org.embulk.config.ConfigDiff;
19
17
  import org.embulk.config.ConfigDefault;
18
+ import org.embulk.config.ConfigException;
20
19
  import org.embulk.config.ConfigSource;
21
20
  import org.embulk.config.Task;
22
21
  import org.embulk.config.TaskSource;
23
- import org.embulk.config.TaskReport;
24
22
  import org.embulk.spi.Exec;
25
23
  import org.embulk.spi.BufferAllocator;
26
24
  import org.embulk.spi.FileInputPlugin;
@@ -29,13 +27,7 @@ import org.embulk.spi.util.InputStreamFileInput;
29
27
 
30
28
  import org.slf4j.Logger;
31
29
 
32
- import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
33
- import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
34
- import com.google.api.client.http.HttpTransport;
35
- import com.google.api.client.json.JsonFactory;
36
- import com.google.api.client.json.jackson2.JacksonFactory;
37
30
  import com.google.api.services.storage.Storage;
38
- import com.google.api.services.storage.StorageScopes;
39
31
  import com.google.api.services.storage.model.Bucket;
40
32
  import com.google.api.services.storage.model.Objects;
41
33
  import com.google.api.services.storage.model.StorageObject;
@@ -56,15 +48,21 @@ public class GcsFileInputPlugin
56
48
  @ConfigDefault("null")
57
49
  Optional<String> getLastPath();
58
50
 
51
+ @Config("auth_method")
52
+ @ConfigDefault("\"private_key\"")
53
+ AuthMethod getAuthMethod();
54
+
59
55
  @Config("service_account_email")
60
- String getServiceAccountEmail();
56
+ @ConfigDefault("null")
57
+ Optional<String> getServiceAccountEmail();
61
58
 
62
59
  @Config("application_name")
63
60
  @ConfigDefault("\"Embulk GCS input plugin\"")
64
61
  String getApplicationName();
65
62
 
66
63
  @Config("p12_keyfile_fullpath")
67
- String getP12KeyfileFullpath();
64
+ @ConfigDefault("null")
65
+ Optional<String> getP12KeyfileFullpath();
68
66
 
69
67
  List<String> getFiles();
70
68
  void setFiles(List<String> files);
@@ -74,8 +72,6 @@ public class GcsFileInputPlugin
74
72
  }
75
73
 
76
74
  private static final Logger log = Exec.getLogger(GcsFileInputPlugin.class);
77
- private static HttpTransport httpTransport;
78
- private static JsonFactory jsonFactory;
79
75
 
80
76
  @Override
81
77
  public ConfigDiff transaction(ConfigSource config,
@@ -83,13 +79,6 @@ public class GcsFileInputPlugin
83
79
  {
84
80
  PluginTask task = config.loadConfig(PluginTask.class);
85
81
 
86
- try {
87
- httpTransport = GoogleNetHttpTransport.newTrustedTransport();
88
- jsonFactory = new JacksonFactory();
89
- } catch (Exception e) {
90
- log.warn("Could not generate http transport");
91
- }
92
-
93
82
  // list files recursively
94
83
  task.setFiles(listFiles(task));
95
84
  // number of processors is same with number of files
@@ -128,39 +117,14 @@ public class GcsFileInputPlugin
128
117
  {
129
118
  }
130
119
 
131
- /**
132
- * @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
133
- */
134
- private static GoogleCredential getCredentialProvider (PluginTask task)
135
- {
120
+ private static Storage newGcsClient(final PluginTask task) {
121
+ Storage client = null;
136
122
  try {
137
- // @see https://cloud.google.com/compute/docs/api/how-tos/authorization
138
- // @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
139
- GoogleCredential cred = new GoogleCredential.Builder().setTransport(httpTransport)
140
- .setJsonFactory(jsonFactory)
141
- .setServiceAccountId(task.getServiceAccountEmail())
142
- .setServiceAccountScopes(
143
- ImmutableList.of(
144
- StorageScopes.DEVSTORAGE_READ_ONLY
145
- )
146
- )
147
- .setServiceAccountPrivateKeyFromP12File(new File(task.getP12KeyfileFullpath()))
148
- .build();
149
- return cred;
150
- } catch (IOException e) {
151
- log.warn(String.format("Could not load client secrets file %s", task.getP12KeyfileFullpath()));
152
- } catch (GeneralSecurityException e) {
153
- log.warn ("Google Authentication was failed");
123
+ GcsAuthentication auth = new GcsAuthentication(task.getAuthMethod().getString(), task.getServiceAccountEmail(), task.getP12KeyfileFullpath(), task.getApplicationName());
124
+ client = auth.getGcsClient(task.getBucket());
125
+ } catch (GeneralSecurityException | IOException ex) {
126
+ throw new ConfigException(ex);
154
127
  }
155
- return null;
156
- }
157
-
158
- private static Storage newGcsClient(PluginTask task)
159
- {
160
- GoogleCredential credentials = getCredentialProvider(task);
161
- Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
162
- .setApplicationName(task.getApplicationName())
163
- .build();
164
128
 
165
129
  return client;
166
130
  }
@@ -291,4 +255,22 @@ public class GcsFileInputPlugin
291
255
  @Override
292
256
  public void close() { }
293
257
  }
258
+
259
+ public enum AuthMethod
260
+ {
261
+ private_key("private_key"),
262
+ compute_engine("compute_engine");
263
+
264
+ private final String string;
265
+
266
+ AuthMethod(String string)
267
+ {
268
+ this.string = string;
269
+ }
270
+
271
+ public String getString()
272
+ {
273
+ return string;
274
+ }
275
+ }
294
276
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-19 00:00:00.000000000 Z
11
+ date: 2015-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
- description: Reads files stored on Google Cloud Storage (Standard/Durable Reduced Availability/Nearline)
41
+ description: Reads files stored on Google Cloud Storage (Standard, Durable Reduced Availability or Nearline)
42
42
  email:
43
43
  - satoshiakama@gmail.com
44
44
  executables: []
@@ -54,11 +54,12 @@ files:
54
54
  - gradlew.bat
55
55
  - lib/embulk/input/gcs.rb
56
56
  - settings.gradle
57
+ - src/main/java/org/embulk/input/gcs/GcsAuthentication.java
57
58
  - src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
58
59
  - src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
59
60
  - classpath/commons-codec-1.3.jar
60
61
  - classpath/commons-logging-1.1.1.jar
61
- - classpath/embulk-input-gcs-0.1.5.jar
62
+ - classpath/embulk-input-gcs-0.1.6.jar
62
63
  - classpath/google-api-client-1.19.1.jar
63
64
  - classpath/google-api-services-storage-v1-rev27-1.19.1.jar
64
65
  - classpath/google-http-client-1.19.0.jar
Binary file