embulk-input-gcs 0.1.13 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +3 -0
- data/README.md +10 -2
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +12 -4
- data/src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java +33 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84c5f3e02ea5ad9dd6b8536ad9c35bc5e893f785
|
4
|
+
data.tar.gz: 6e5cc8dda9c62e0515db17cf1c486b902609b853
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad92f9ff0c884436d4e667d01f23ea088e6a68793941ec8efb87e7694d2770ecde5a0a2ad59da1e58649477be2bd1237843b729004113d447bdeba9c359f0d36
|
7
|
+
data.tar.gz: 15087aa4dbd3365c88b3ab75cbf02de30cd9e0250bf3b372aeee155b2155239db68b75934d920120771e126b14825d7e4544bbca0bf5fb2dff3f483a6837eee1
|
data/ChangeLog
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
Release 0.2.0 - 2016-06-03
|
2
|
+
* Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
|
3
|
+
|
1
4
|
Release 0.1.13 - 2016-02-04
|
2
5
|
* Upgraded embulk to v0.8.2
|
3
6
|
* Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0
|
data/README.md
CHANGED
@@ -37,7 +37,8 @@ embulk run /path/to/config.yml
|
|
37
37
|
## Configuration
|
38
38
|
|
39
39
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
40
|
-
- **path_prefix** prefix of target keys (string, required)
|
40
|
+
- **path_prefix** prefix of target keys (string, either of "path_prefix" or "paths" is required)
|
41
|
+
- **paths** list of target keys (array of string, either of "path_prefix" or "paths" is required)
|
41
42
|
- **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
|
42
43
|
- **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
|
43
44
|
- **p12_keyfile** fullpath of p12 key (string, required when auth_method is private_key)
|
@@ -147,6 +148,13 @@ in:
|
|
147
148
|
auth_method: compute_engine
|
148
149
|
```
|
149
150
|
|
151
|
+
## Eventually Consistency
|
152
|
+
|
153
|
+
An operation listing objects is eventually consistent although getting objects is strongly consistent, see https://cloud.google.com/storage/docs/consistency.
|
154
|
+
|
155
|
+
`path_prefix` uses the objects list API, therefore it would miss some of objects.
|
156
|
+
If you want to avoid such situations, you should use `paths` option which directly specifies object paths without the objects list API.
|
157
|
+
|
150
158
|
## Build
|
151
159
|
|
152
160
|
```
|
@@ -201,4 +209,4 @@ $ launchctl load ~/Library/LaunchAgents/environment.plist
|
|
201
209
|
$ launchctl getenv GCP_EMAIL //try to get value.
|
202
210
|
|
203
211
|
Then start your applications.
|
204
|
-
```
|
212
|
+
```
|
data/build.gradle
CHANGED
@@ -52,7 +52,8 @@ public class GcsFileInputPlugin
|
|
52
52
|
String getBucket();
|
53
53
|
|
54
54
|
@Config("path_prefix")
|
55
|
-
|
55
|
+
@ConfigDefault("null")
|
56
|
+
Optional<String> getPathPrefix();
|
56
57
|
|
57
58
|
@Config("last_path")
|
58
59
|
@ConfigDefault("null")
|
@@ -84,6 +85,8 @@ public class GcsFileInputPlugin
|
|
84
85
|
@ConfigDefault("null")
|
85
86
|
Optional<LocalFile> getJsonKeyfile();
|
86
87
|
|
88
|
+
@Config("paths")
|
89
|
+
@ConfigDefault("[]")
|
87
90
|
List<String> getFiles();
|
88
91
|
void setFiles(List<String> files);
|
89
92
|
|
@@ -131,8 +134,13 @@ public class GcsFileInputPlugin
|
|
131
134
|
|
132
135
|
Storage client = newGcsClient(task, newGcsAuth(task));
|
133
136
|
|
134
|
-
// list files recursively
|
135
|
-
task.
|
137
|
+
// list files recursively if path_prefix is specified
|
138
|
+
if (task.getPathPrefix().isPresent()) {
|
139
|
+
task.setFiles(listFiles(task, client));
|
140
|
+
}
|
141
|
+
if (task.getFiles().isEmpty()) {
|
142
|
+
throw new ConfigException("No file is found. Fix path_prefix or specify paths directly");
|
143
|
+
}
|
136
144
|
// number of processors is same with number of files
|
137
145
|
return resume(task.dump(), task.getFiles().size(), control);
|
138
146
|
}
|
@@ -214,7 +222,7 @@ public class GcsFileInputPlugin
|
|
214
222
|
{
|
215
223
|
String bucket = task.getBucket();
|
216
224
|
|
217
|
-
return listGcsFilesByPrefix(client, bucket, task.getPathPrefix(), task.getLastPath());
|
225
|
+
return listGcsFilesByPrefix(client, bucket, task.getPathPrefix().get(), task.getLastPath());
|
218
226
|
}
|
219
227
|
|
220
228
|
/**
|
@@ -36,6 +36,7 @@ import java.util.Arrays;
|
|
36
36
|
import java.util.List;
|
37
37
|
|
38
38
|
import static org.junit.Assert.assertEquals;
|
39
|
+
import static org.junit.Assert.assertFalse;
|
39
40
|
import static org.junit.Assume.assumeNotNull;
|
40
41
|
|
41
42
|
import java.lang.reflect.InvocationTargetException;
|
@@ -107,6 +108,38 @@ public class TestGcsFileInputPlugin
|
|
107
108
|
assertEquals("Embulk GCS input plugin", task.getApplicationName());
|
108
109
|
}
|
109
110
|
|
111
|
+
// paths are set
|
112
|
+
@Test
|
113
|
+
public void checkDefaultValuesPathsSpecified()
|
114
|
+
{
|
115
|
+
ConfigSource config = Exec.newConfigSource()
|
116
|
+
.set("bucket", GCP_BUCKET)
|
117
|
+
.set("paths", Arrays.asList("object1", "object2"))
|
118
|
+
.set("auth_method", "private_key")
|
119
|
+
.set("service_account_email", GCP_EMAIL)
|
120
|
+
.set("p12_keyfile", GCP_P12_KEYFILE)
|
121
|
+
.set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
|
122
|
+
.set("parser", parserConfig(schemaConfig()));
|
123
|
+
|
124
|
+
GcsFileInputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
|
125
|
+
assertFalse(task.getFiles().isEmpty());
|
126
|
+
}
|
127
|
+
|
128
|
+
// both path_prefix and paths are not set
|
129
|
+
@Test(expected = ConfigException.class)
|
130
|
+
public void checkDefaultValuesNoPathSpecified()
|
131
|
+
{
|
132
|
+
ConfigSource config = Exec.newConfigSource()
|
133
|
+
.set("bucket", GCP_BUCKET)
|
134
|
+
.set("auth_method", "private_key")
|
135
|
+
.set("service_account_email", GCP_EMAIL)
|
136
|
+
.set("p12_keyfile", GCP_P12_KEYFILE)
|
137
|
+
.set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
|
138
|
+
.set("parser", parserConfig(schemaConfig()));
|
139
|
+
|
140
|
+
runner.transaction(config, new Control());
|
141
|
+
}
|
142
|
+
|
110
143
|
// p12_keyfile is null when auth_method is private_key
|
111
144
|
@Test(expected = ConfigException.class)
|
112
145
|
public void checkDefaultValuesP12keyNull()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02
|
11
|
+
date: 2016-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/resources/sample_02.csv
|
66
66
|
- classpath/commons-codec-1.3.jar
|
67
67
|
- classpath/commons-logging-1.1.1.jar
|
68
|
-
- classpath/embulk-input-gcs-0.
|
68
|
+
- classpath/embulk-input-gcs-0.2.0.jar
|
69
69
|
- classpath/google-api-client-1.21.0.jar
|
70
70
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|
71
71
|
- classpath/google-http-client-1.21.0.jar
|