embulk-input-gcs 0.2.5 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/CHANGELOG.md +3 -0
- data/README.md +16 -0
- data/build.gradle +1 -1
- data/classpath/embulk-input-gcs-0.2.6.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/input/gcs/FileList.java +335 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInput.java +195 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +10 -362
- data/src/main/java/org/embulk/input/gcs/PluginTask.java +71 -0
- data/src/main/java/org/embulk/input/gcs/SingleFileProvider.java +143 -0
- data/src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java +54 -21
- metadata +7 -3
- data/classpath/embulk-input-gcs-0.2.5.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04cb5b37d8fb8c70e1c9c4c306cf792ac1ad1ec9
|
4
|
+
data.tar.gz: 44ec9518fc188320a19ffbe2ce7db0b07cda30f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d2a59336002f07d48bcf8b5b22b3626e9c59df7aad93a4f91721667f6f7e1552bd1db72ebd1323e19ce1dc67f32da64b9ad88bc6d6785591904b8d87059249f
|
7
|
+
data.tar.gz: 1533eccae86b7355303ab91bc5ab08cf49c816438fef3541ad1318fc24f0eab948667132952746173a2dd3dabea41bc06cfc2493b8b4701c1908d2e07ff9b635
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.2.6 - 2018-03-05
|
2
|
+
* [maintenance] Support "path_match_pattern" option [#32](https://github.com/embulk/embulk-input-gcs/pull/32)
|
3
|
+
|
1
4
|
## 0.2.5 - 2017-05-19
|
2
5
|
* [maintenance] Fix InputStream handling to avoid plugin get less records than expected [#27](https://github.com/embulk/embulk-input-gcs/pull/27)
|
3
6
|
|
data/README.md
CHANGED
@@ -40,6 +40,7 @@ embulk run /path/to/config.yml
|
|
40
40
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
41
41
|
- **path_prefix** prefix of target keys (string, either of "path_prefix" or "paths" is required)
|
42
42
|
- **paths** list of target keys (array of string, either of "path_prefix" or "paths" is required)
|
43
|
+
* **path_match_pattern**: regexp to match file paths. If a file path doesn't match with this pattern, the file will be skipped (regexp string, optional)
|
43
44
|
- **incremental**: enables incremental loading(boolean, optional. default: true. If incremental loading is enabled, config diff for the next execution will include `last_path` parameter so that next execution skips files before the path. Otherwise, `last_path` will not be included.
|
44
45
|
- **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
|
45
46
|
- **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
|
@@ -89,6 +90,21 @@ in:
|
|
89
90
|
out: {type: stdout}
|
90
91
|
```
|
91
92
|
|
93
|
+
To skip files using regexp:
|
94
|
+
|
95
|
+
```yaml
|
96
|
+
in:
|
97
|
+
type: gcs
|
98
|
+
bucket: my-gcs-bucket
|
99
|
+
path_prefix: logs/csv-
|
100
|
+
# ...
|
101
|
+
path_match_pattern: \.csv$ # a file will be skipped if its path doesn't match with this pattern
|
102
|
+
## some examples of regexp:
|
103
|
+
#path_match_pattern: /archive/ # match files in .../archive/... directory
|
104
|
+
#path_match_pattern: /data1/|/data2/ # match files in .../data1/... or .../data2/... directory
|
105
|
+
#path_match_pattern: .csv$|.csv.gz$ # match files whose suffix is .csv or .csv.gz
|
106
|
+
```
|
107
|
+
|
92
108
|
## Authentication
|
93
109
|
|
94
110
|
There are three methods supported to fetch access token for the service account.
|
data/build.gradle
CHANGED
Binary file
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -0,0 +1,335 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonIgnore;
|
5
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
import com.google.common.base.Optional;
|
7
|
+
import com.google.common.base.Throwables;
|
8
|
+
import org.embulk.config.Config;
|
9
|
+
import org.embulk.config.ConfigDefault;
|
10
|
+
import org.embulk.config.ConfigSource;
|
11
|
+
|
12
|
+
import java.io.BufferedInputStream;
|
13
|
+
import java.io.BufferedOutputStream;
|
14
|
+
import java.io.ByteArrayInputStream;
|
15
|
+
import java.io.ByteArrayOutputStream;
|
16
|
+
import java.io.IOException;
|
17
|
+
import java.io.InputStream;
|
18
|
+
import java.io.OutputStream;
|
19
|
+
import java.nio.ByteBuffer;
|
20
|
+
import java.nio.charset.StandardCharsets;
|
21
|
+
import java.util.AbstractList;
|
22
|
+
import java.util.ArrayList;
|
23
|
+
import java.util.List;
|
24
|
+
import java.util.regex.Pattern;
|
25
|
+
import java.util.zip.GZIPInputStream;
|
26
|
+
import java.util.zip.GZIPOutputStream;
|
27
|
+
|
28
|
+
public class FileList
|
29
|
+
{
|
30
|
+
public interface Task
|
31
|
+
{
|
32
|
+
@Config("path_match_pattern")
|
33
|
+
@ConfigDefault("\".*\"")
|
34
|
+
String getPathMatchPattern();
|
35
|
+
|
36
|
+
@Config("total_file_count_limit")
|
37
|
+
@ConfigDefault("2147483647")
|
38
|
+
int getTotalFileCountLimit();
|
39
|
+
|
40
|
+
// TODO support more algorithms to combine tasks
|
41
|
+
@Config("min_task_size")
|
42
|
+
@ConfigDefault("0")
|
43
|
+
long getMinTaskSize();
|
44
|
+
}
|
45
|
+
|
46
|
+
public static class Entry
|
47
|
+
{
|
48
|
+
private int index;
|
49
|
+
private long size;
|
50
|
+
|
51
|
+
@JsonCreator
|
52
|
+
public Entry(
|
53
|
+
@JsonProperty("index") int index,
|
54
|
+
@JsonProperty("size") long size)
|
55
|
+
{
|
56
|
+
this.index = index;
|
57
|
+
this.size = size;
|
58
|
+
}
|
59
|
+
|
60
|
+
@JsonProperty("index")
|
61
|
+
public int getIndex()
|
62
|
+
{
|
63
|
+
return index;
|
64
|
+
}
|
65
|
+
|
66
|
+
@JsonProperty("size")
|
67
|
+
public long getSize()
|
68
|
+
{
|
69
|
+
return size;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
public static class Builder
|
74
|
+
{
|
75
|
+
private final ByteArrayOutputStream binary;
|
76
|
+
private final OutputStream stream;
|
77
|
+
private final List<Entry> entries = new ArrayList<>();
|
78
|
+
private String last = null;
|
79
|
+
|
80
|
+
private int limitCount = Integer.MAX_VALUE;
|
81
|
+
private long minTaskSize = 1;
|
82
|
+
private Pattern pathMatchPattern;
|
83
|
+
|
84
|
+
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
85
|
+
|
86
|
+
public Builder(Task task)
|
87
|
+
{
|
88
|
+
this();
|
89
|
+
this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
|
90
|
+
this.limitCount = task.getTotalFileCountLimit();
|
91
|
+
this.minTaskSize = task.getMinTaskSize();
|
92
|
+
}
|
93
|
+
|
94
|
+
public Builder(ConfigSource config)
|
95
|
+
{
|
96
|
+
this();
|
97
|
+
this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
|
98
|
+
this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
|
99
|
+
this.minTaskSize = config.get(long.class, "min_task_size", 0L);
|
100
|
+
}
|
101
|
+
|
102
|
+
public Builder()
|
103
|
+
{
|
104
|
+
binary = new ByteArrayOutputStream();
|
105
|
+
try {
|
106
|
+
stream = new BufferedOutputStream(new GZIPOutputStream(binary));
|
107
|
+
}
|
108
|
+
catch (IOException ex) {
|
109
|
+
throw Throwables.propagate(ex);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
public Builder limitTotalFileCount(int limitCount)
|
114
|
+
{
|
115
|
+
this.limitCount = limitCount;
|
116
|
+
return this;
|
117
|
+
}
|
118
|
+
|
119
|
+
public Builder minTaskSize(long bytes)
|
120
|
+
{
|
121
|
+
this.minTaskSize = bytes;
|
122
|
+
return this;
|
123
|
+
}
|
124
|
+
|
125
|
+
public Builder pathMatchPattern(String pattern)
|
126
|
+
{
|
127
|
+
this.pathMatchPattern = Pattern.compile(pattern);
|
128
|
+
return this;
|
129
|
+
}
|
130
|
+
|
131
|
+
public int size()
|
132
|
+
{
|
133
|
+
return entries.size();
|
134
|
+
}
|
135
|
+
|
136
|
+
public boolean needsMore()
|
137
|
+
{
|
138
|
+
return size() < limitCount;
|
139
|
+
}
|
140
|
+
|
141
|
+
// returns true if this file is used
|
142
|
+
public synchronized boolean add(String path, long size)
|
143
|
+
{
|
144
|
+
// TODO throw IllegalStateException if stream is already closed
|
145
|
+
|
146
|
+
if (!needsMore()) {
|
147
|
+
return false;
|
148
|
+
}
|
149
|
+
|
150
|
+
if (!pathMatchPattern.matcher(path).find()) {
|
151
|
+
return false;
|
152
|
+
}
|
153
|
+
|
154
|
+
int index = entries.size();
|
155
|
+
entries.add(new Entry(index, size));
|
156
|
+
|
157
|
+
byte[] data = path.getBytes(StandardCharsets.UTF_8);
|
158
|
+
castBuffer.putInt(0, data.length);
|
159
|
+
try {
|
160
|
+
stream.write(castBuffer.array());
|
161
|
+
stream.write(data);
|
162
|
+
}
|
163
|
+
catch (IOException ex) {
|
164
|
+
throw Throwables.propagate(ex);
|
165
|
+
}
|
166
|
+
|
167
|
+
last = path;
|
168
|
+
return true;
|
169
|
+
}
|
170
|
+
|
171
|
+
public FileList build()
|
172
|
+
{
|
173
|
+
try {
|
174
|
+
stream.close();
|
175
|
+
}
|
176
|
+
catch (IOException ex) {
|
177
|
+
throw Throwables.propagate(ex);
|
178
|
+
}
|
179
|
+
return new FileList(binary.toByteArray(), getSplits(entries), Optional.fromNullable(last));
|
180
|
+
}
|
181
|
+
|
182
|
+
private List<List<Entry>> getSplits(List<Entry> all)
|
183
|
+
{
|
184
|
+
List<List<Entry>> tasks = new ArrayList<>();
|
185
|
+
long currentTaskSize = 0;
|
186
|
+
List<Entry> currentTask = new ArrayList<>();
|
187
|
+
for (Entry entry : all) {
|
188
|
+
currentTask.add(entry);
|
189
|
+
currentTaskSize += entry.getSize(); // TODO consider to multiply the size by cost_per_byte, and add cost_per_file
|
190
|
+
if (currentTaskSize >= minTaskSize) {
|
191
|
+
tasks.add(currentTask);
|
192
|
+
currentTask = new ArrayList<>();
|
193
|
+
currentTaskSize = 0;
|
194
|
+
}
|
195
|
+
}
|
196
|
+
if (!currentTask.isEmpty()) {
|
197
|
+
tasks.add(currentTask);
|
198
|
+
}
|
199
|
+
return tasks;
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
private final byte[] data;
|
204
|
+
private final List<List<Entry>> tasks;
|
205
|
+
private final Optional<String> last;
|
206
|
+
|
207
|
+
@JsonCreator
|
208
|
+
@Deprecated
|
209
|
+
public FileList(
|
210
|
+
@JsonProperty("data") byte[] data,
|
211
|
+
@JsonProperty("tasks") List<List<Entry>> tasks,
|
212
|
+
@JsonProperty("last") Optional<String> last)
|
213
|
+
{
|
214
|
+
this.data = data;
|
215
|
+
this.tasks = tasks;
|
216
|
+
this.last = last;
|
217
|
+
}
|
218
|
+
|
219
|
+
@JsonIgnore
|
220
|
+
public Optional<String> getLastPath(Optional<String> lastLastPath)
|
221
|
+
{
|
222
|
+
if (last.isPresent()) {
|
223
|
+
return last;
|
224
|
+
}
|
225
|
+
return lastLastPath;
|
226
|
+
}
|
227
|
+
|
228
|
+
@JsonIgnore
|
229
|
+
public int getTaskCount()
|
230
|
+
{
|
231
|
+
return tasks.size();
|
232
|
+
}
|
233
|
+
|
234
|
+
@JsonIgnore
|
235
|
+
public List<String> get(int i)
|
236
|
+
{
|
237
|
+
return new EntryList(data, tasks.get(i));
|
238
|
+
}
|
239
|
+
|
240
|
+
@JsonProperty("data")
|
241
|
+
@Deprecated
|
242
|
+
public byte[] getData()
|
243
|
+
{
|
244
|
+
return data;
|
245
|
+
}
|
246
|
+
|
247
|
+
@JsonProperty("tasks")
|
248
|
+
@Deprecated
|
249
|
+
public List<List<Entry>> getTasks()
|
250
|
+
{
|
251
|
+
return tasks;
|
252
|
+
}
|
253
|
+
|
254
|
+
@JsonProperty("last")
|
255
|
+
@Deprecated
|
256
|
+
public Optional<String> getLast()
|
257
|
+
{
|
258
|
+
return last;
|
259
|
+
}
|
260
|
+
|
261
|
+
private class EntryList
|
262
|
+
extends AbstractList<String>
|
263
|
+
{
|
264
|
+
private final byte[] data;
|
265
|
+
private final List<Entry> entries;
|
266
|
+
private InputStream stream;
|
267
|
+
private int current;
|
268
|
+
|
269
|
+
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
270
|
+
|
271
|
+
public EntryList(byte[] data, List<Entry> entries)
|
272
|
+
{
|
273
|
+
this.data = data;
|
274
|
+
this.entries = entries;
|
275
|
+
try {
|
276
|
+
this.stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
|
277
|
+
}
|
278
|
+
catch (IOException ex) {
|
279
|
+
throw Throwables.propagate(ex);
|
280
|
+
}
|
281
|
+
this.current = 0;
|
282
|
+
}
|
283
|
+
|
284
|
+
@Override
|
285
|
+
public synchronized String get(int i)
|
286
|
+
{
|
287
|
+
Entry e = entries.get(i);
|
288
|
+
if (e.getIndex() < current) {
|
289
|
+
// rewind to the head
|
290
|
+
try {
|
291
|
+
stream.close();
|
292
|
+
stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
|
293
|
+
}
|
294
|
+
catch (IOException ex) {
|
295
|
+
throw Throwables.propagate(ex);
|
296
|
+
}
|
297
|
+
current = 0;
|
298
|
+
}
|
299
|
+
|
300
|
+
while (current < e.getIndex()) {
|
301
|
+
readNext();
|
302
|
+
}
|
303
|
+
// now current == e.getIndex()
|
304
|
+
return readNextString();
|
305
|
+
}
|
306
|
+
|
307
|
+
@Override
|
308
|
+
public int size()
|
309
|
+
{
|
310
|
+
return entries.size();
|
311
|
+
}
|
312
|
+
|
313
|
+
private byte[] readNext()
|
314
|
+
{
|
315
|
+
try {
|
316
|
+
stream.read(castBuffer.array());
|
317
|
+
int n = castBuffer.getInt(0);
|
318
|
+
byte[] b = new byte[n]; // here should be able to use a pooled buffer because read data is ignored if readNextString doesn't call this method
|
319
|
+
stream.read(b);
|
320
|
+
|
321
|
+
current++;
|
322
|
+
|
323
|
+
return b;
|
324
|
+
}
|
325
|
+
catch (IOException ex) {
|
326
|
+
throw Throwables.propagate(ex);
|
327
|
+
}
|
328
|
+
}
|
329
|
+
|
330
|
+
private String readNextString()
|
331
|
+
{
|
332
|
+
return new String(readNext(), StandardCharsets.UTF_8);
|
333
|
+
}
|
334
|
+
}
|
335
|
+
}
|
@@ -0,0 +1,195 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import com.google.api.client.http.HttpResponseException;
|
4
|
+
import com.google.api.services.storage.Storage;
|
5
|
+
import com.google.api.services.storage.model.Bucket;
|
6
|
+
import com.google.api.services.storage.model.Objects;
|
7
|
+
import com.google.api.services.storage.model.StorageObject;
|
8
|
+
import com.google.common.base.Charsets;
|
9
|
+
import com.google.common.base.Function;
|
10
|
+
import com.google.common.base.Optional;
|
11
|
+
import com.google.common.io.BaseEncoding;
|
12
|
+
import org.embulk.config.ConfigException;
|
13
|
+
import org.embulk.config.TaskReport;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.TransactionalFileInput;
|
16
|
+
import org.embulk.spi.unit.LocalFile;
|
17
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
18
|
+
import org.slf4j.Logger;
|
19
|
+
|
20
|
+
import java.io.IOException;
|
21
|
+
import java.math.BigInteger;
|
22
|
+
import java.security.GeneralSecurityException;
|
23
|
+
import java.util.List;
|
24
|
+
|
25
|
+
public class GcsFileInput
|
26
|
+
extends InputStreamFileInput
|
27
|
+
implements TransactionalFileInput
|
28
|
+
{
|
29
|
+
private static final Logger log = Exec.getLogger(org.embulk.input.gcs.GcsFileInput.class);
|
30
|
+
|
31
|
+
public GcsFileInput(PluginTask task, int taskIndex)
|
32
|
+
{
|
33
|
+
super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
|
34
|
+
}
|
35
|
+
|
36
|
+
public void abort()
|
37
|
+
{
|
38
|
+
}
|
39
|
+
|
40
|
+
public TaskReport commit()
|
41
|
+
{
|
42
|
+
return Exec.newTaskReport();
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void close()
|
47
|
+
{
|
48
|
+
}
|
49
|
+
|
50
|
+
public static GcsAuthentication newGcsAuth(PluginTask task)
|
51
|
+
{
|
52
|
+
try {
|
53
|
+
return new GcsAuthentication(
|
54
|
+
task.getAuthMethod().getString(),
|
55
|
+
task.getServiceAccountEmail(),
|
56
|
+
task.getP12Keyfile().transform(localFileToPathString()),
|
57
|
+
task.getJsonKeyfile().transform(localFileToPathString()),
|
58
|
+
task.getApplicationName()
|
59
|
+
);
|
60
|
+
}
|
61
|
+
catch (GeneralSecurityException | IOException ex) {
|
62
|
+
throw new ConfigException(ex);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
protected static Storage newGcsClient(final PluginTask task, final GcsAuthentication auth)
|
67
|
+
{
|
68
|
+
Storage client = null;
|
69
|
+
try {
|
70
|
+
client = auth.getGcsClient(task.getBucket(), task.getMaxConnectionRetry());
|
71
|
+
}
|
72
|
+
catch (IOException ex) {
|
73
|
+
throw new ConfigException(ex);
|
74
|
+
}
|
75
|
+
|
76
|
+
return client;
|
77
|
+
}
|
78
|
+
|
79
|
+
private static Function<LocalFile, String> localFileToPathString()
|
80
|
+
{
|
81
|
+
return new Function<LocalFile, String>()
|
82
|
+
{
|
83
|
+
public String apply(LocalFile file)
|
84
|
+
{
|
85
|
+
return file.getPath().toString();
|
86
|
+
}
|
87
|
+
};
|
88
|
+
}
|
89
|
+
|
90
|
+
public static FileList listFiles(PluginTask task, Storage client)
|
91
|
+
{
|
92
|
+
String bucket = task.getBucket();
|
93
|
+
|
94
|
+
FileList.Builder builder = new FileList.Builder(task);
|
95
|
+
listGcsFilesByPrefix(builder, client, bucket, task.getPathPrefix().get(), task.getLastPath());
|
96
|
+
return builder.build();
|
97
|
+
}
|
98
|
+
|
99
|
+
/**
|
100
|
+
* Lists GCS filenames filtered by prefix.
|
101
|
+
*
|
102
|
+
* The resulting list does not include the file that's size == 0.
|
103
|
+
*/
|
104
|
+
public static void listGcsFilesByPrefix(FileList.Builder builder, Storage client, String bucket,
|
105
|
+
String prefix, Optional<String> lastPath)
|
106
|
+
{
|
107
|
+
String lastKey = lastPath.isPresent() ? base64Encode(lastPath.get()) : null;
|
108
|
+
|
109
|
+
// @see https://cloud.google.com/storage/docs/json_api/v1/objects#resource
|
110
|
+
if (log.isDebugEnabled()) {
|
111
|
+
try {
|
112
|
+
Storage.Buckets.Get getBucket = client.buckets().get(bucket);
|
113
|
+
getBucket.setProjection("full");
|
114
|
+
Bucket bk = getBucket.execute();
|
115
|
+
|
116
|
+
log.debug("bucket name: " + bucket);
|
117
|
+
log.debug("bucket location: " + bk.getLocation());
|
118
|
+
log.debug("bucket timeCreated: " + bk.getTimeCreated());
|
119
|
+
log.debug("bucket owner: " + bk.getOwner());
|
120
|
+
}
|
121
|
+
catch (IOException e) {
|
122
|
+
log.warn("Could not access to bucket:" + bucket);
|
123
|
+
log.warn(e.getMessage());
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
try {
|
128
|
+
// @see https://cloud.google.com/storage/docs/json_api/v1/objects/list
|
129
|
+
Storage.Objects.List listObjects = client.objects().list(bucket);
|
130
|
+
listObjects.setPrefix(prefix);
|
131
|
+
listObjects.setPageToken(lastKey);
|
132
|
+
do {
|
133
|
+
Objects objects = listObjects.execute();
|
134
|
+
List<StorageObject> items = objects.getItems();
|
135
|
+
if (items == null) {
|
136
|
+
log.info(String.format("No file was found in bucket:%s prefix:%s", bucket, prefix));
|
137
|
+
break;
|
138
|
+
}
|
139
|
+
for (StorageObject o : items) {
|
140
|
+
if (o.getSize().compareTo(BigInteger.ZERO) > 0) {
|
141
|
+
builder.add(o.getName(), o.getSize().longValue());
|
142
|
+
}
|
143
|
+
log.debug("filename: " + o.getName());
|
144
|
+
log.debug("updated: " + o.getUpdated());
|
145
|
+
}
|
146
|
+
lastKey = objects.getNextPageToken();
|
147
|
+
listObjects.setPageToken(lastKey);
|
148
|
+
} while (lastKey != null);
|
149
|
+
}
|
150
|
+
catch (IOException e) {
|
151
|
+
if ((e instanceof HttpResponseException) && ((HttpResponseException) e).getStatusCode() == 400) {
|
152
|
+
throw new ConfigException(String.format("Files listing failed: bucket:%s, prefix:%s, last_path:%s", bucket, prefix, lastKey), e);
|
153
|
+
}
|
154
|
+
|
155
|
+
log.warn(String.format("Could not get file list from bucket:%s", bucket));
|
156
|
+
log.warn(e.getMessage());
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
// String nextToken = base64Encode(0x0a + 0x01~0x27 + filePath);
|
161
|
+
private static String base64Encode(String path)
|
162
|
+
{
|
163
|
+
byte[] encoding;
|
164
|
+
byte[] utf8 = path.getBytes(Charsets.UTF_8);
|
165
|
+
log.debug(String.format("path string: %s ,path length:%s \" + ", path, utf8.length));
|
166
|
+
|
167
|
+
encoding = new byte[utf8.length + 2];
|
168
|
+
encoding[0] = 0x0a;
|
169
|
+
encoding[1] = new Byte(String.valueOf(path.length()));
|
170
|
+
System.arraycopy(utf8, 0, encoding, 2, utf8.length);
|
171
|
+
|
172
|
+
String s = BaseEncoding.base64().encode(encoding);
|
173
|
+
log.debug(String.format("last_path(base64 encoded): %s", s));
|
174
|
+
return s;
|
175
|
+
}
|
176
|
+
|
177
|
+
public enum AuthMethod
|
178
|
+
{
|
179
|
+
private_key("private_key"),
|
180
|
+
compute_engine("compute_engine"),
|
181
|
+
json_key("json_key");
|
182
|
+
|
183
|
+
private final String string;
|
184
|
+
|
185
|
+
AuthMethod(String string)
|
186
|
+
{
|
187
|
+
this.string = string;
|
188
|
+
}
|
189
|
+
|
190
|
+
public String getString()
|
191
|
+
{
|
192
|
+
return string;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
}
|