embulk-input-gcs 0.1.13 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e7dc0a163123cb7bb2de1738c46d60b70282d12
4
- data.tar.gz: 10370c8b104b51f6e5ed55297181647a9caa1d82
3
+ metadata.gz: 84c5f3e02ea5ad9dd6b8536ad9c35bc5e893f785
4
+ data.tar.gz: 6e5cc8dda9c62e0515db17cf1c486b902609b853
5
5
  SHA512:
6
- metadata.gz: 107ca80b29d9f3a0ea45a5d62326c570e8df3c2efea0a979f51cbcca174e8d67dd8d97183ad7856362699fbd693363ba96e10f9a0513122019a76d6e75dffeab
7
- data.tar.gz: 39bb76c59dd27373c31280ad20b614d4fa6aab4655eacf9631b29a160838e545d0ec04d3ae7ea140c3897a6020c60f3e4623846bce43edf133708330b4ecd6b4
6
+ metadata.gz: ad92f9ff0c884436d4e667d01f23ea088e6a68793941ec8efb87e7694d2770ecde5a0a2ad59da1e58649477be2bd1237843b729004113d447bdeba9c359f0d36
7
+ data.tar.gz: 15087aa4dbd3365c88b3ab75cbf02de30cd9e0250bf3b372aeee155b2155239db68b75934d920120771e126b14825d7e4544bbca0bf5fb2dff3f483a6837eee1
data/ChangeLog CHANGED
@@ -1,3 +1,6 @@
1
+ Release 0.2.0 - 2016-06-03
2
+ * Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
3
+
1
4
  Release 0.1.13 - 2016-02-04
2
5
  * Upgraded embulk to v0.8.2
3
6
  * Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0
data/README.md CHANGED
@@ -37,7 +37,8 @@ embulk run /path/to/config.yml
37
37
  ## Configuration
38
38
 
39
39
  - **bucket** Google Cloud Storage bucket name (string, required)
40
- - **path_prefix** prefix of target keys (string, required)
40
+ - **path_prefix** prefix of target keys (string, either of "path_prefix" or "paths" is required)
41
+ - **paths** list of target keys (array of string, either of "path_prefix" or "paths" is required)
41
42
  - **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
42
43
  - **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
43
44
  - **p12_keyfile** fullpath of p12 key (string, required when auth_method is private_key)
@@ -147,6 +148,13 @@ in:
147
148
  auth_method: compute_engine
148
149
  ```
149
150
 
151
+ ## Eventually Consistency
152
+
153
+ An operation listing objects is eventually consistent although getting objects is strongly consistent, see https://cloud.google.com/storage/docs/consistency.
154
+
155
+ `path_prefix` uses the objects list API, therefore it would miss some of objects.
156
+ If you want to avoid such situations, you should use `paths` option which directly specifies object paths without the objects list API.
157
+
150
158
  ## Build
151
159
 
152
160
  ```
@@ -201,4 +209,4 @@ $ launchctl load ~/Library/LaunchAgents/environment.plist
201
209
  $ launchctl getenv GCP_EMAIL //try to get value.
202
210
 
203
211
  Then start your applications.
204
- ```
212
+ ```
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
- version = "0.1.13"
20
+ version = "0.2.0"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.2"
@@ -52,7 +52,8 @@ public class GcsFileInputPlugin
52
52
  String getBucket();
53
53
 
54
54
  @Config("path_prefix")
55
- String getPathPrefix();
55
+ @ConfigDefault("null")
56
+ Optional<String> getPathPrefix();
56
57
 
57
58
  @Config("last_path")
58
59
  @ConfigDefault("null")
@@ -84,6 +85,8 @@ public class GcsFileInputPlugin
84
85
  @ConfigDefault("null")
85
86
  Optional<LocalFile> getJsonKeyfile();
86
87
 
88
+ @Config("paths")
89
+ @ConfigDefault("[]")
87
90
  List<String> getFiles();
88
91
  void setFiles(List<String> files);
89
92
 
@@ -131,8 +134,13 @@ public class GcsFileInputPlugin
131
134
 
132
135
  Storage client = newGcsClient(task, newGcsAuth(task));
133
136
 
134
- // list files recursively
135
- task.setFiles(listFiles(task, client));
137
+ // list files recursively if path_prefix is specified
138
+ if (task.getPathPrefix().isPresent()) {
139
+ task.setFiles(listFiles(task, client));
140
+ }
141
+ if (task.getFiles().isEmpty()) {
142
+ throw new ConfigException("No file is found. Fix path_prefix or specify paths directly");
143
+ }
136
144
  // number of processors is same with number of files
137
145
  return resume(task.dump(), task.getFiles().size(), control);
138
146
  }
@@ -214,7 +222,7 @@ public class GcsFileInputPlugin
214
222
  {
215
223
  String bucket = task.getBucket();
216
224
 
217
- return listGcsFilesByPrefix(client, bucket, task.getPathPrefix(), task.getLastPath());
225
+ return listGcsFilesByPrefix(client, bucket, task.getPathPrefix().get(), task.getLastPath());
218
226
  }
219
227
 
220
228
  /**
@@ -36,6 +36,7 @@ import java.util.Arrays;
36
36
  import java.util.List;
37
37
 
38
38
  import static org.junit.Assert.assertEquals;
39
+ import static org.junit.Assert.assertFalse;
39
40
  import static org.junit.Assume.assumeNotNull;
40
41
 
41
42
  import java.lang.reflect.InvocationTargetException;
@@ -107,6 +108,38 @@ public class TestGcsFileInputPlugin
107
108
  assertEquals("Embulk GCS input plugin", task.getApplicationName());
108
109
  }
109
110
 
111
+ // paths are set
112
+ @Test
113
+ public void checkDefaultValuesPathsSpecified()
114
+ {
115
+ ConfigSource config = Exec.newConfigSource()
116
+ .set("bucket", GCP_BUCKET)
117
+ .set("paths", Arrays.asList("object1", "object2"))
118
+ .set("auth_method", "private_key")
119
+ .set("service_account_email", GCP_EMAIL)
120
+ .set("p12_keyfile", GCP_P12_KEYFILE)
121
+ .set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
122
+ .set("parser", parserConfig(schemaConfig()));
123
+
124
+ GcsFileInputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
125
+ assertFalse(task.getFiles().isEmpty());
126
+ }
127
+
128
+ // both path_prefix and paths are not set
129
+ @Test(expected = ConfigException.class)
130
+ public void checkDefaultValuesNoPathSpecified()
131
+ {
132
+ ConfigSource config = Exec.newConfigSource()
133
+ .set("bucket", GCP_BUCKET)
134
+ .set("auth_method", "private_key")
135
+ .set("service_account_email", GCP_EMAIL)
136
+ .set("p12_keyfile", GCP_P12_KEYFILE)
137
+ .set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
138
+ .set("parser", parserConfig(schemaConfig()));
139
+
140
+ runner.transaction(config, new Control());
141
+ }
142
+
110
143
  // p12_keyfile is null when auth_method is private_key
111
144
  @Test(expected = ConfigException.class)
112
145
  public void checkDefaultValuesP12keyNull()
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-04 00:00:00.000000000 Z
11
+ date: 2016-06-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/resources/sample_02.csv
66
66
  - classpath/commons-codec-1.3.jar
67
67
  - classpath/commons-logging-1.1.1.jar
68
- - classpath/embulk-input-gcs-0.1.13.jar
68
+ - classpath/embulk-input-gcs-0.2.0.jar
69
69
  - classpath/google-api-client-1.21.0.jar
70
70
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar
71
71
  - classpath/google-http-client-1.21.0.jar