embulk-input-gcs 0.1.13 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +3 -0
- data/README.md +10 -2
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +12 -4
- data/src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java +33 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84c5f3e02ea5ad9dd6b8536ad9c35bc5e893f785
|
4
|
+
data.tar.gz: 6e5cc8dda9c62e0515db17cf1c486b902609b853
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad92f9ff0c884436d4e667d01f23ea088e6a68793941ec8efb87e7694d2770ecde5a0a2ad59da1e58649477be2bd1237843b729004113d447bdeba9c359f0d36
|
7
|
+
data.tar.gz: 15087aa4dbd3365c88b3ab75cbf02de30cd9e0250bf3b372aeee155b2155239db68b75934d920120771e126b14825d7e4544bbca0bf5fb2dff3f483a6837eee1
|
data/ChangeLog
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
Release 0.2.0 - 2016-06-03
|
2
|
+
* Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
|
3
|
+
|
1
4
|
Release 0.1.13 - 2016-02-04
|
2
5
|
* Upgraded embulk to v0.8.2
|
3
6
|
* Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0
|
data/README.md
CHANGED
@@ -37,7 +37,8 @@ embulk run /path/to/config.yml
|
|
37
37
|
## Configuration
|
38
38
|
|
39
39
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
40
|
-
- **path_prefix** prefix of target keys (string, required)
|
40
|
+
- **path_prefix** prefix of target keys (string, either of "path_prefix" or "paths" is required)
|
41
|
+
- **paths** list of target keys (array of string, either of "path_prefix" or "paths" is required)
|
41
42
|
- **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
|
42
43
|
- **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
|
43
44
|
- **p12_keyfile** fullpath of p12 key (string, required when auth_method is private_key)
|
@@ -147,6 +148,13 @@ in:
|
|
147
148
|
auth_method: compute_engine
|
148
149
|
```
|
149
150
|
|
151
|
+
## Eventually Consistency
|
152
|
+
|
153
|
+
An operation listing objects is eventually consistent although getting objects is strongly consistent, see https://cloud.google.com/storage/docs/consistency.
|
154
|
+
|
155
|
+
`path_prefix` uses the objects list API, therefore it would miss some of objects.
|
156
|
+
If you want to avoid such situations, you should use `paths` option which directly specifies object paths without the objects list API.
|
157
|
+
|
150
158
|
## Build
|
151
159
|
|
152
160
|
```
|
@@ -201,4 +209,4 @@ $ launchctl load ~/Library/LaunchAgents/environment.plist
|
|
201
209
|
$ launchctl getenv GCP_EMAIL //try to get value.
|
202
210
|
|
203
211
|
Then start your applications.
|
204
|
-
```
|
212
|
+
```
|
data/build.gradle
CHANGED
@@ -52,7 +52,8 @@ public class GcsFileInputPlugin
|
|
52
52
|
String getBucket();
|
53
53
|
|
54
54
|
@Config("path_prefix")
|
55
|
-
|
55
|
+
@ConfigDefault("null")
|
56
|
+
Optional<String> getPathPrefix();
|
56
57
|
|
57
58
|
@Config("last_path")
|
58
59
|
@ConfigDefault("null")
|
@@ -84,6 +85,8 @@ public class GcsFileInputPlugin
|
|
84
85
|
@ConfigDefault("null")
|
85
86
|
Optional<LocalFile> getJsonKeyfile();
|
86
87
|
|
88
|
+
@Config("paths")
|
89
|
+
@ConfigDefault("[]")
|
87
90
|
List<String> getFiles();
|
88
91
|
void setFiles(List<String> files);
|
89
92
|
|
@@ -131,8 +134,13 @@ public class GcsFileInputPlugin
|
|
131
134
|
|
132
135
|
Storage client = newGcsClient(task, newGcsAuth(task));
|
133
136
|
|
134
|
-
// list files recursively
|
135
|
-
task.
|
137
|
+
// list files recursively if path_prefix is specified
|
138
|
+
if (task.getPathPrefix().isPresent()) {
|
139
|
+
task.setFiles(listFiles(task, client));
|
140
|
+
}
|
141
|
+
if (task.getFiles().isEmpty()) {
|
142
|
+
throw new ConfigException("No file is found. Fix path_prefix or specify paths directly");
|
143
|
+
}
|
136
144
|
// number of processors is same with number of files
|
137
145
|
return resume(task.dump(), task.getFiles().size(), control);
|
138
146
|
}
|
@@ -214,7 +222,7 @@ public class GcsFileInputPlugin
|
|
214
222
|
{
|
215
223
|
String bucket = task.getBucket();
|
216
224
|
|
217
|
-
return listGcsFilesByPrefix(client, bucket, task.getPathPrefix(), task.getLastPath());
|
225
|
+
return listGcsFilesByPrefix(client, bucket, task.getPathPrefix().get(), task.getLastPath());
|
218
226
|
}
|
219
227
|
|
220
228
|
/**
|
@@ -36,6 +36,7 @@ import java.util.Arrays;
|
|
36
36
|
import java.util.List;
|
37
37
|
|
38
38
|
import static org.junit.Assert.assertEquals;
|
39
|
+
import static org.junit.Assert.assertFalse;
|
39
40
|
import static org.junit.Assume.assumeNotNull;
|
40
41
|
|
41
42
|
import java.lang.reflect.InvocationTargetException;
|
@@ -107,6 +108,38 @@ public class TestGcsFileInputPlugin
|
|
107
108
|
assertEquals("Embulk GCS input plugin", task.getApplicationName());
|
108
109
|
}
|
109
110
|
|
111
|
+
// paths are set
|
112
|
+
@Test
|
113
|
+
public void checkDefaultValuesPathsSpecified()
|
114
|
+
{
|
115
|
+
ConfigSource config = Exec.newConfigSource()
|
116
|
+
.set("bucket", GCP_BUCKET)
|
117
|
+
.set("paths", Arrays.asList("object1", "object2"))
|
118
|
+
.set("auth_method", "private_key")
|
119
|
+
.set("service_account_email", GCP_EMAIL)
|
120
|
+
.set("p12_keyfile", GCP_P12_KEYFILE)
|
121
|
+
.set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
|
122
|
+
.set("parser", parserConfig(schemaConfig()));
|
123
|
+
|
124
|
+
GcsFileInputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
|
125
|
+
assertFalse(task.getFiles().isEmpty());
|
126
|
+
}
|
127
|
+
|
128
|
+
// both path_prefix and paths are not set
|
129
|
+
@Test(expected = ConfigException.class)
|
130
|
+
public void checkDefaultValuesNoPathSpecified()
|
131
|
+
{
|
132
|
+
ConfigSource config = Exec.newConfigSource()
|
133
|
+
.set("bucket", GCP_BUCKET)
|
134
|
+
.set("auth_method", "private_key")
|
135
|
+
.set("service_account_email", GCP_EMAIL)
|
136
|
+
.set("p12_keyfile", GCP_P12_KEYFILE)
|
137
|
+
.set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
|
138
|
+
.set("parser", parserConfig(schemaConfig()));
|
139
|
+
|
140
|
+
runner.transaction(config, new Control());
|
141
|
+
}
|
142
|
+
|
110
143
|
// p12_keyfile is null when auth_method is private_key
|
111
144
|
@Test(expected = ConfigException.class)
|
112
145
|
public void checkDefaultValuesP12keyNull()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02
|
11
|
+
date: 2016-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/resources/sample_02.csv
|
66
66
|
- classpath/commons-codec-1.3.jar
|
67
67
|
- classpath/commons-logging-1.1.1.jar
|
68
|
-
- classpath/embulk-input-gcs-0.
|
68
|
+
- classpath/embulk-input-gcs-0.2.0.jar
|
69
69
|
- classpath/google-api-client-1.21.0.jar
|
70
70
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|
71
71
|
- classpath/google-http-client-1.21.0.jar
|