embulk-input-gcs 0.1.13 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e7dc0a163123cb7bb2de1738c46d60b70282d12
4
- data.tar.gz: 10370c8b104b51f6e5ed55297181647a9caa1d82
3
+ metadata.gz: 84c5f3e02ea5ad9dd6b8536ad9c35bc5e893f785
4
+ data.tar.gz: 6e5cc8dda9c62e0515db17cf1c486b902609b853
5
5
  SHA512:
6
- metadata.gz: 107ca80b29d9f3a0ea45a5d62326c570e8df3c2efea0a979f51cbcca174e8d67dd8d97183ad7856362699fbd693363ba96e10f9a0513122019a76d6e75dffeab
7
- data.tar.gz: 39bb76c59dd27373c31280ad20b614d4fa6aab4655eacf9631b29a160838e545d0ec04d3ae7ea140c3897a6020c60f3e4623846bce43edf133708330b4ecd6b4
6
+ metadata.gz: ad92f9ff0c884436d4e667d01f23ea088e6a68793941ec8efb87e7694d2770ecde5a0a2ad59da1e58649477be2bd1237843b729004113d447bdeba9c359f0d36
7
+ data.tar.gz: 15087aa4dbd3365c88b3ab75cbf02de30cd9e0250bf3b372aeee155b2155239db68b75934d920120771e126b14825d7e4544bbca0bf5fb2dff3f483a6837eee1
data/ChangeLog CHANGED
@@ -1,3 +1,6 @@
1
+ Release 0.2.0 - 2016-06-03
2
+ * Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
3
+
1
4
  Release 0.1.13 - 2016-02-04
2
5
  * Upgraded embulk to v0.8.2
3
6
  * Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0
data/README.md CHANGED
@@ -37,7 +37,8 @@ embulk run /path/to/config.yml
37
37
  ## Configuration
38
38
 
39
39
  - **bucket** Google Cloud Storage bucket name (string, required)
40
- - **path_prefix** prefix of target keys (string, required)
40
+ - **path_prefix** prefix of target keys (string, either of "path_prefix" or "paths" is required)
41
+ - **paths** list of target keys (array of string, either of "path_prefix" or "paths" is required)
41
42
  - **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
42
43
  - **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
43
44
  - **p12_keyfile** fullpath of p12 key (string, required when auth_method is private_key)
@@ -147,6 +148,13 @@ in:
147
148
  auth_method: compute_engine
148
149
  ```
149
150
 
151
+ ## Eventually Consistency
152
+
153
+ An operation listing objects is eventually consistent although getting objects is strongly consistent, see https://cloud.google.com/storage/docs/consistency.
154
+
155
+ `path_prefix` uses the objects list API, therefore it would miss some of objects.
156
+ If you want to avoid such situations, you should use `paths` option which directly specifies object paths without the objects list API.
157
+
150
158
  ## Build
151
159
 
152
160
  ```
@@ -201,4 +209,4 @@ $ launchctl load ~/Library/LaunchAgents/environment.plist
201
209
  $ launchctl getenv GCP_EMAIL //try to get value.
202
210
 
203
211
  Then start your applications.
204
- ```
212
+ ```
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
- version = "0.1.13"
20
+ version = "0.2.0"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.2"
@@ -52,7 +52,8 @@ public class GcsFileInputPlugin
52
52
  String getBucket();
53
53
 
54
54
  @Config("path_prefix")
55
- String getPathPrefix();
55
+ @ConfigDefault("null")
56
+ Optional<String> getPathPrefix();
56
57
 
57
58
  @Config("last_path")
58
59
  @ConfigDefault("null")
@@ -84,6 +85,8 @@ public class GcsFileInputPlugin
84
85
  @ConfigDefault("null")
85
86
  Optional<LocalFile> getJsonKeyfile();
86
87
 
88
+ @Config("paths")
89
+ @ConfigDefault("[]")
87
90
  List<String> getFiles();
88
91
  void setFiles(List<String> files);
89
92
 
@@ -131,8 +134,13 @@ public class GcsFileInputPlugin
131
134
 
132
135
  Storage client = newGcsClient(task, newGcsAuth(task));
133
136
 
134
- // list files recursively
135
- task.setFiles(listFiles(task, client));
137
+ // list files recursively if path_prefix is specified
138
+ if (task.getPathPrefix().isPresent()) {
139
+ task.setFiles(listFiles(task, client));
140
+ }
141
+ if (task.getFiles().isEmpty()) {
142
+ throw new ConfigException("No file is found. Fix path_prefix or specify paths directly");
143
+ }
136
144
  // number of processors is same with number of files
137
145
  return resume(task.dump(), task.getFiles().size(), control);
138
146
  }
@@ -214,7 +222,7 @@ public class GcsFileInputPlugin
214
222
  {
215
223
  String bucket = task.getBucket();
216
224
 
217
- return listGcsFilesByPrefix(client, bucket, task.getPathPrefix(), task.getLastPath());
225
+ return listGcsFilesByPrefix(client, bucket, task.getPathPrefix().get(), task.getLastPath());
218
226
  }
219
227
 
220
228
  /**
@@ -36,6 +36,7 @@ import java.util.Arrays;
36
36
  import java.util.List;
37
37
 
38
38
  import static org.junit.Assert.assertEquals;
39
+ import static org.junit.Assert.assertFalse;
39
40
  import static org.junit.Assume.assumeNotNull;
40
41
 
41
42
  import java.lang.reflect.InvocationTargetException;
@@ -107,6 +108,38 @@ public class TestGcsFileInputPlugin
107
108
  assertEquals("Embulk GCS input plugin", task.getApplicationName());
108
109
  }
109
110
 
111
+ // paths are set
112
+ @Test
113
+ public void checkDefaultValuesPathsSpecified()
114
+ {
115
+ ConfigSource config = Exec.newConfigSource()
116
+ .set("bucket", GCP_BUCKET)
117
+ .set("paths", Arrays.asList("object1", "object2"))
118
+ .set("auth_method", "private_key")
119
+ .set("service_account_email", GCP_EMAIL)
120
+ .set("p12_keyfile", GCP_P12_KEYFILE)
121
+ .set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
122
+ .set("parser", parserConfig(schemaConfig()));
123
+
124
+ GcsFileInputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
125
+ assertFalse(task.getFiles().isEmpty());
126
+ }
127
+
128
+ // both path_prefix and paths are not set
129
+ @Test(expected = ConfigException.class)
130
+ public void checkDefaultValuesNoPathSpecified()
131
+ {
132
+ ConfigSource config = Exec.newConfigSource()
133
+ .set("bucket", GCP_BUCKET)
134
+ .set("auth_method", "private_key")
135
+ .set("service_account_email", GCP_EMAIL)
136
+ .set("p12_keyfile", GCP_P12_KEYFILE)
137
+ .set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
138
+ .set("parser", parserConfig(schemaConfig()));
139
+
140
+ runner.transaction(config, new Control());
141
+ }
142
+
110
143
  // p12_keyfile is null when auth_method is private_key
111
144
  @Test(expected = ConfigException.class)
112
145
  public void checkDefaultValuesP12keyNull()
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-04 00:00:00.000000000 Z
11
+ date: 2016-06-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/resources/sample_02.csv
66
66
  - classpath/commons-codec-1.3.jar
67
67
  - classpath/commons-logging-1.1.1.jar
68
- - classpath/embulk-input-gcs-0.1.13.jar
68
+ - classpath/embulk-input-gcs-0.2.0.jar
69
69
  - classpath/google-api-client-1.21.0.jar
70
70
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar
71
71
  - classpath/google-http-client-1.21.0.jar