embulk-input-gcs 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/CHANGELOG.md +3 -0
- data/README.md +16 -0
- data/build.gradle +1 -1
- data/classpath/embulk-input-gcs-0.2.6.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/input/gcs/FileList.java +335 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInput.java +195 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +10 -362
- data/src/main/java/org/embulk/input/gcs/PluginTask.java +71 -0
- data/src/main/java/org/embulk/input/gcs/SingleFileProvider.java +143 -0
- data/src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java +54 -21
- metadata +7 -3
- data/classpath/embulk-input-gcs-0.2.5.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04cb5b37d8fb8c70e1c9c4c306cf792ac1ad1ec9
|
4
|
+
data.tar.gz: 44ec9518fc188320a19ffbe2ce7db0b07cda30f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d2a59336002f07d48bcf8b5b22b3626e9c59df7aad93a4f91721667f6f7e1552bd1db72ebd1323e19ce1dc67f32da64b9ad88bc6d6785591904b8d87059249f
|
7
|
+
data.tar.gz: 1533eccae86b7355303ab91bc5ab08cf49c816438fef3541ad1318fc24f0eab948667132952746173a2dd3dabea41bc06cfc2493b8b4701c1908d2e07ff9b635
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.2.6 - 2018-03-05
|
2
|
+
* [maintenance] Support "path_match_pattern" option [#32](https://github.com/embulk/embulk-input-gcs/pull/32)
|
3
|
+
|
1
4
|
## 0.2.5 - 2017-05-19
|
2
5
|
* [maintenance] Fix InputStream handling to avoid plugin get less records than expected [#27](https://github.com/embulk/embulk-input-gcs/pull/27)
|
3
6
|
|
data/README.md
CHANGED
@@ -40,6 +40,7 @@ embulk run /path/to/config.yml
|
|
40
40
|
- **bucket** Google Cloud Storage bucket name (string, required)
|
41
41
|
- **path_prefix** prefix of target keys (string, either of "path_prefix" or "paths" is required)
|
42
42
|
- **paths** list of target keys (array of string, either of "path_prefix" or "paths" is required)
|
43
|
+
* **path_match_pattern**: regexp to match file paths. If a file path doesn't match with this pattern, the file will be skipped (regexp string, optional)
|
43
44
|
- **incremental**: enables incremental loading(boolean, optional. default: true. If incremental loading is enabled, config diff for the next execution will include `last_path` parameter so that next execution skips files before the path. Otherwise, `last_path` will not be included.
|
44
45
|
- **auth_method** (string, optional, "private_key", "json_key" or "compute_engine". default value is "private_key")
|
45
46
|
- **service_account_email** Google Cloud Storage service_account_email (string, required when auth_method is private_key)
|
@@ -89,6 +90,21 @@ in:
|
|
89
90
|
out: {type: stdout}
|
90
91
|
```
|
91
92
|
|
93
|
+
To skip files using regexp:
|
94
|
+
|
95
|
+
```yaml
|
96
|
+
in:
|
97
|
+
type: gcs
|
98
|
+
bucket: my-gcs-bucket
|
99
|
+
path_prefix: logs/csv-
|
100
|
+
# ...
|
101
|
+
path_match_pattern: \.csv$ # a file will be skipped if its path doesn't match with this pattern
|
102
|
+
## some examples of regexp:
|
103
|
+
#path_match_pattern: /archive/ # match files in .../archive/... directory
|
104
|
+
#path_match_pattern: /data1/|/data2/ # match files in .../data1/... or .../data2/... directory
|
105
|
+
#path_match_pattern: .csv$|.csv.gz$ # match files whose suffix is .csv or .csv.gz
|
106
|
+
```
|
107
|
+
|
92
108
|
## Authentication
|
93
109
|
|
94
110
|
There are three methods supported to fetch access token for the service account.
|
data/build.gradle
CHANGED
Binary file
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -0,0 +1,335 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonIgnore;
|
5
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
import com.google.common.base.Optional;
|
7
|
+
import com.google.common.base.Throwables;
|
8
|
+
import org.embulk.config.Config;
|
9
|
+
import org.embulk.config.ConfigDefault;
|
10
|
+
import org.embulk.config.ConfigSource;
|
11
|
+
|
12
|
+
import java.io.BufferedInputStream;
|
13
|
+
import java.io.BufferedOutputStream;
|
14
|
+
import java.io.ByteArrayInputStream;
|
15
|
+
import java.io.ByteArrayOutputStream;
|
16
|
+
import java.io.IOException;
|
17
|
+
import java.io.InputStream;
|
18
|
+
import java.io.OutputStream;
|
19
|
+
import java.nio.ByteBuffer;
|
20
|
+
import java.nio.charset.StandardCharsets;
|
21
|
+
import java.util.AbstractList;
|
22
|
+
import java.util.ArrayList;
|
23
|
+
import java.util.List;
|
24
|
+
import java.util.regex.Pattern;
|
25
|
+
import java.util.zip.GZIPInputStream;
|
26
|
+
import java.util.zip.GZIPOutputStream;
|
27
|
+
|
28
|
+
public class FileList
|
29
|
+
{
|
30
|
+
public interface Task
|
31
|
+
{
|
32
|
+
@Config("path_match_pattern")
|
33
|
+
@ConfigDefault("\".*\"")
|
34
|
+
String getPathMatchPattern();
|
35
|
+
|
36
|
+
@Config("total_file_count_limit")
|
37
|
+
@ConfigDefault("2147483647")
|
38
|
+
int getTotalFileCountLimit();
|
39
|
+
|
40
|
+
// TODO support more algorithms to combine tasks
|
41
|
+
@Config("min_task_size")
|
42
|
+
@ConfigDefault("0")
|
43
|
+
long getMinTaskSize();
|
44
|
+
}
|
45
|
+
|
46
|
+
public static class Entry
|
47
|
+
{
|
48
|
+
private int index;
|
49
|
+
private long size;
|
50
|
+
|
51
|
+
@JsonCreator
|
52
|
+
public Entry(
|
53
|
+
@JsonProperty("index") int index,
|
54
|
+
@JsonProperty("size") long size)
|
55
|
+
{
|
56
|
+
this.index = index;
|
57
|
+
this.size = size;
|
58
|
+
}
|
59
|
+
|
60
|
+
@JsonProperty("index")
|
61
|
+
public int getIndex()
|
62
|
+
{
|
63
|
+
return index;
|
64
|
+
}
|
65
|
+
|
66
|
+
@JsonProperty("size")
|
67
|
+
public long getSize()
|
68
|
+
{
|
69
|
+
return size;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
public static class Builder
|
74
|
+
{
|
75
|
+
private final ByteArrayOutputStream binary;
|
76
|
+
private final OutputStream stream;
|
77
|
+
private final List<Entry> entries = new ArrayList<>();
|
78
|
+
private String last = null;
|
79
|
+
|
80
|
+
private int limitCount = Integer.MAX_VALUE;
|
81
|
+
private long minTaskSize = 1;
|
82
|
+
private Pattern pathMatchPattern;
|
83
|
+
|
84
|
+
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
85
|
+
|
86
|
+
public Builder(Task task)
|
87
|
+
{
|
88
|
+
this();
|
89
|
+
this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
|
90
|
+
this.limitCount = task.getTotalFileCountLimit();
|
91
|
+
this.minTaskSize = task.getMinTaskSize();
|
92
|
+
}
|
93
|
+
|
94
|
+
public Builder(ConfigSource config)
|
95
|
+
{
|
96
|
+
this();
|
97
|
+
this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
|
98
|
+
this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
|
99
|
+
this.minTaskSize = config.get(long.class, "min_task_size", 0L);
|
100
|
+
}
|
101
|
+
|
102
|
+
public Builder()
|
103
|
+
{
|
104
|
+
binary = new ByteArrayOutputStream();
|
105
|
+
try {
|
106
|
+
stream = new BufferedOutputStream(new GZIPOutputStream(binary));
|
107
|
+
}
|
108
|
+
catch (IOException ex) {
|
109
|
+
throw Throwables.propagate(ex);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
public Builder limitTotalFileCount(int limitCount)
|
114
|
+
{
|
115
|
+
this.limitCount = limitCount;
|
116
|
+
return this;
|
117
|
+
}
|
118
|
+
|
119
|
+
public Builder minTaskSize(long bytes)
|
120
|
+
{
|
121
|
+
this.minTaskSize = bytes;
|
122
|
+
return this;
|
123
|
+
}
|
124
|
+
|
125
|
+
public Builder pathMatchPattern(String pattern)
|
126
|
+
{
|
127
|
+
this.pathMatchPattern = Pattern.compile(pattern);
|
128
|
+
return this;
|
129
|
+
}
|
130
|
+
|
131
|
+
public int size()
|
132
|
+
{
|
133
|
+
return entries.size();
|
134
|
+
}
|
135
|
+
|
136
|
+
public boolean needsMore()
|
137
|
+
{
|
138
|
+
return size() < limitCount;
|
139
|
+
}
|
140
|
+
|
141
|
+
// returns true if this file is used
|
142
|
+
public synchronized boolean add(String path, long size)
|
143
|
+
{
|
144
|
+
// TODO throw IllegalStateException if stream is already closed
|
145
|
+
|
146
|
+
if (!needsMore()) {
|
147
|
+
return false;
|
148
|
+
}
|
149
|
+
|
150
|
+
if (!pathMatchPattern.matcher(path).find()) {
|
151
|
+
return false;
|
152
|
+
}
|
153
|
+
|
154
|
+
int index = entries.size();
|
155
|
+
entries.add(new Entry(index, size));
|
156
|
+
|
157
|
+
byte[] data = path.getBytes(StandardCharsets.UTF_8);
|
158
|
+
castBuffer.putInt(0, data.length);
|
159
|
+
try {
|
160
|
+
stream.write(castBuffer.array());
|
161
|
+
stream.write(data);
|
162
|
+
}
|
163
|
+
catch (IOException ex) {
|
164
|
+
throw Throwables.propagate(ex);
|
165
|
+
}
|
166
|
+
|
167
|
+
last = path;
|
168
|
+
return true;
|
169
|
+
}
|
170
|
+
|
171
|
+
public FileList build()
|
172
|
+
{
|
173
|
+
try {
|
174
|
+
stream.close();
|
175
|
+
}
|
176
|
+
catch (IOException ex) {
|
177
|
+
throw Throwables.propagate(ex);
|
178
|
+
}
|
179
|
+
return new FileList(binary.toByteArray(), getSplits(entries), Optional.fromNullable(last));
|
180
|
+
}
|
181
|
+
|
182
|
+
private List<List<Entry>> getSplits(List<Entry> all)
|
183
|
+
{
|
184
|
+
List<List<Entry>> tasks = new ArrayList<>();
|
185
|
+
long currentTaskSize = 0;
|
186
|
+
List<Entry> currentTask = new ArrayList<>();
|
187
|
+
for (Entry entry : all) {
|
188
|
+
currentTask.add(entry);
|
189
|
+
currentTaskSize += entry.getSize(); // TODO consider to multiply the size by cost_per_byte, and add cost_per_file
|
190
|
+
if (currentTaskSize >= minTaskSize) {
|
191
|
+
tasks.add(currentTask);
|
192
|
+
currentTask = new ArrayList<>();
|
193
|
+
currentTaskSize = 0;
|
194
|
+
}
|
195
|
+
}
|
196
|
+
if (!currentTask.isEmpty()) {
|
197
|
+
tasks.add(currentTask);
|
198
|
+
}
|
199
|
+
return tasks;
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
private final byte[] data;
|
204
|
+
private final List<List<Entry>> tasks;
|
205
|
+
private final Optional<String> last;
|
206
|
+
|
207
|
+
@JsonCreator
|
208
|
+
@Deprecated
|
209
|
+
public FileList(
|
210
|
+
@JsonProperty("data") byte[] data,
|
211
|
+
@JsonProperty("tasks") List<List<Entry>> tasks,
|
212
|
+
@JsonProperty("last") Optional<String> last)
|
213
|
+
{
|
214
|
+
this.data = data;
|
215
|
+
this.tasks = tasks;
|
216
|
+
this.last = last;
|
217
|
+
}
|
218
|
+
|
219
|
+
@JsonIgnore
|
220
|
+
public Optional<String> getLastPath(Optional<String> lastLastPath)
|
221
|
+
{
|
222
|
+
if (last.isPresent()) {
|
223
|
+
return last;
|
224
|
+
}
|
225
|
+
return lastLastPath;
|
226
|
+
}
|
227
|
+
|
228
|
+
@JsonIgnore
|
229
|
+
public int getTaskCount()
|
230
|
+
{
|
231
|
+
return tasks.size();
|
232
|
+
}
|
233
|
+
|
234
|
+
@JsonIgnore
|
235
|
+
public List<String> get(int i)
|
236
|
+
{
|
237
|
+
return new EntryList(data, tasks.get(i));
|
238
|
+
}
|
239
|
+
|
240
|
+
@JsonProperty("data")
|
241
|
+
@Deprecated
|
242
|
+
public byte[] getData()
|
243
|
+
{
|
244
|
+
return data;
|
245
|
+
}
|
246
|
+
|
247
|
+
@JsonProperty("tasks")
|
248
|
+
@Deprecated
|
249
|
+
public List<List<Entry>> getTasks()
|
250
|
+
{
|
251
|
+
return tasks;
|
252
|
+
}
|
253
|
+
|
254
|
+
@JsonProperty("last")
|
255
|
+
@Deprecated
|
256
|
+
public Optional<String> getLast()
|
257
|
+
{
|
258
|
+
return last;
|
259
|
+
}
|
260
|
+
|
261
|
+
private class EntryList
|
262
|
+
extends AbstractList<String>
|
263
|
+
{
|
264
|
+
private final byte[] data;
|
265
|
+
private final List<Entry> entries;
|
266
|
+
private InputStream stream;
|
267
|
+
private int current;
|
268
|
+
|
269
|
+
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
270
|
+
|
271
|
+
public EntryList(byte[] data, List<Entry> entries)
|
272
|
+
{
|
273
|
+
this.data = data;
|
274
|
+
this.entries = entries;
|
275
|
+
try {
|
276
|
+
this.stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
|
277
|
+
}
|
278
|
+
catch (IOException ex) {
|
279
|
+
throw Throwables.propagate(ex);
|
280
|
+
}
|
281
|
+
this.current = 0;
|
282
|
+
}
|
283
|
+
|
284
|
+
@Override
|
285
|
+
public synchronized String get(int i)
|
286
|
+
{
|
287
|
+
Entry e = entries.get(i);
|
288
|
+
if (e.getIndex() < current) {
|
289
|
+
// rewind to the head
|
290
|
+
try {
|
291
|
+
stream.close();
|
292
|
+
stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
|
293
|
+
}
|
294
|
+
catch (IOException ex) {
|
295
|
+
throw Throwables.propagate(ex);
|
296
|
+
}
|
297
|
+
current = 0;
|
298
|
+
}
|
299
|
+
|
300
|
+
while (current < e.getIndex()) {
|
301
|
+
readNext();
|
302
|
+
}
|
303
|
+
// now current == e.getIndex()
|
304
|
+
return readNextString();
|
305
|
+
}
|
306
|
+
|
307
|
+
@Override
|
308
|
+
public int size()
|
309
|
+
{
|
310
|
+
return entries.size();
|
311
|
+
}
|
312
|
+
|
313
|
+
private byte[] readNext()
|
314
|
+
{
|
315
|
+
try {
|
316
|
+
stream.read(castBuffer.array());
|
317
|
+
int n = castBuffer.getInt(0);
|
318
|
+
byte[] b = new byte[n]; // here should be able to use a pooled buffer because read data is ignored if readNextString doesn't call this method
|
319
|
+
stream.read(b);
|
320
|
+
|
321
|
+
current++;
|
322
|
+
|
323
|
+
return b;
|
324
|
+
}
|
325
|
+
catch (IOException ex) {
|
326
|
+
throw Throwables.propagate(ex);
|
327
|
+
}
|
328
|
+
}
|
329
|
+
|
330
|
+
private String readNextString()
|
331
|
+
{
|
332
|
+
return new String(readNext(), StandardCharsets.UTF_8);
|
333
|
+
}
|
334
|
+
}
|
335
|
+
}
|
@@ -0,0 +1,195 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import com.google.api.client.http.HttpResponseException;
|
4
|
+
import com.google.api.services.storage.Storage;
|
5
|
+
import com.google.api.services.storage.model.Bucket;
|
6
|
+
import com.google.api.services.storage.model.Objects;
|
7
|
+
import com.google.api.services.storage.model.StorageObject;
|
8
|
+
import com.google.common.base.Charsets;
|
9
|
+
import com.google.common.base.Function;
|
10
|
+
import com.google.common.base.Optional;
|
11
|
+
import com.google.common.io.BaseEncoding;
|
12
|
+
import org.embulk.config.ConfigException;
|
13
|
+
import org.embulk.config.TaskReport;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.TransactionalFileInput;
|
16
|
+
import org.embulk.spi.unit.LocalFile;
|
17
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
18
|
+
import org.slf4j.Logger;
|
19
|
+
|
20
|
+
import java.io.IOException;
|
21
|
+
import java.math.BigInteger;
|
22
|
+
import java.security.GeneralSecurityException;
|
23
|
+
import java.util.List;
|
24
|
+
|
25
|
+
public class GcsFileInput
|
26
|
+
extends InputStreamFileInput
|
27
|
+
implements TransactionalFileInput
|
28
|
+
{
|
29
|
+
private static final Logger log = Exec.getLogger(org.embulk.input.gcs.GcsFileInput.class);
|
30
|
+
|
31
|
+
public GcsFileInput(PluginTask task, int taskIndex)
|
32
|
+
{
|
33
|
+
super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
|
34
|
+
}
|
35
|
+
|
36
|
+
public void abort()
|
37
|
+
{
|
38
|
+
}
|
39
|
+
|
40
|
+
public TaskReport commit()
|
41
|
+
{
|
42
|
+
return Exec.newTaskReport();
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void close()
|
47
|
+
{
|
48
|
+
}
|
49
|
+
|
50
|
+
public static GcsAuthentication newGcsAuth(PluginTask task)
|
51
|
+
{
|
52
|
+
try {
|
53
|
+
return new GcsAuthentication(
|
54
|
+
task.getAuthMethod().getString(),
|
55
|
+
task.getServiceAccountEmail(),
|
56
|
+
task.getP12Keyfile().transform(localFileToPathString()),
|
57
|
+
task.getJsonKeyfile().transform(localFileToPathString()),
|
58
|
+
task.getApplicationName()
|
59
|
+
);
|
60
|
+
}
|
61
|
+
catch (GeneralSecurityException | IOException ex) {
|
62
|
+
throw new ConfigException(ex);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
protected static Storage newGcsClient(final PluginTask task, final GcsAuthentication auth)
|
67
|
+
{
|
68
|
+
Storage client = null;
|
69
|
+
try {
|
70
|
+
client = auth.getGcsClient(task.getBucket(), task.getMaxConnectionRetry());
|
71
|
+
}
|
72
|
+
catch (IOException ex) {
|
73
|
+
throw new ConfigException(ex);
|
74
|
+
}
|
75
|
+
|
76
|
+
return client;
|
77
|
+
}
|
78
|
+
|
79
|
+
private static Function<LocalFile, String> localFileToPathString()
|
80
|
+
{
|
81
|
+
return new Function<LocalFile, String>()
|
82
|
+
{
|
83
|
+
public String apply(LocalFile file)
|
84
|
+
{
|
85
|
+
return file.getPath().toString();
|
86
|
+
}
|
87
|
+
};
|
88
|
+
}
|
89
|
+
|
90
|
+
public static FileList listFiles(PluginTask task, Storage client)
|
91
|
+
{
|
92
|
+
String bucket = task.getBucket();
|
93
|
+
|
94
|
+
FileList.Builder builder = new FileList.Builder(task);
|
95
|
+
listGcsFilesByPrefix(builder, client, bucket, task.getPathPrefix().get(), task.getLastPath());
|
96
|
+
return builder.build();
|
97
|
+
}
|
98
|
+
|
99
|
+
/**
|
100
|
+
* Lists GCS filenames filtered by prefix.
|
101
|
+
*
|
102
|
+
* The resulting list does not include the file that's size == 0.
|
103
|
+
*/
|
104
|
+
public static void listGcsFilesByPrefix(FileList.Builder builder, Storage client, String bucket,
|
105
|
+
String prefix, Optional<String> lastPath)
|
106
|
+
{
|
107
|
+
String lastKey = lastPath.isPresent() ? base64Encode(lastPath.get()) : null;
|
108
|
+
|
109
|
+
// @see https://cloud.google.com/storage/docs/json_api/v1/objects#resource
|
110
|
+
if (log.isDebugEnabled()) {
|
111
|
+
try {
|
112
|
+
Storage.Buckets.Get getBucket = client.buckets().get(bucket);
|
113
|
+
getBucket.setProjection("full");
|
114
|
+
Bucket bk = getBucket.execute();
|
115
|
+
|
116
|
+
log.debug("bucket name: " + bucket);
|
117
|
+
log.debug("bucket location: " + bk.getLocation());
|
118
|
+
log.debug("bucket timeCreated: " + bk.getTimeCreated());
|
119
|
+
log.debug("bucket owner: " + bk.getOwner());
|
120
|
+
}
|
121
|
+
catch (IOException e) {
|
122
|
+
log.warn("Could not access to bucket:" + bucket);
|
123
|
+
log.warn(e.getMessage());
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
try {
|
128
|
+
// @see https://cloud.google.com/storage/docs/json_api/v1/objects/list
|
129
|
+
Storage.Objects.List listObjects = client.objects().list(bucket);
|
130
|
+
listObjects.setPrefix(prefix);
|
131
|
+
listObjects.setPageToken(lastKey);
|
132
|
+
do {
|
133
|
+
Objects objects = listObjects.execute();
|
134
|
+
List<StorageObject> items = objects.getItems();
|
135
|
+
if (items == null) {
|
136
|
+
log.info(String.format("No file was found in bucket:%s prefix:%s", bucket, prefix));
|
137
|
+
break;
|
138
|
+
}
|
139
|
+
for (StorageObject o : items) {
|
140
|
+
if (o.getSize().compareTo(BigInteger.ZERO) > 0) {
|
141
|
+
builder.add(o.getName(), o.getSize().longValue());
|
142
|
+
}
|
143
|
+
log.debug("filename: " + o.getName());
|
144
|
+
log.debug("updated: " + o.getUpdated());
|
145
|
+
}
|
146
|
+
lastKey = objects.getNextPageToken();
|
147
|
+
listObjects.setPageToken(lastKey);
|
148
|
+
} while (lastKey != null);
|
149
|
+
}
|
150
|
+
catch (IOException e) {
|
151
|
+
if ((e instanceof HttpResponseException) && ((HttpResponseException) e).getStatusCode() == 400) {
|
152
|
+
throw new ConfigException(String.format("Files listing failed: bucket:%s, prefix:%s, last_path:%s", bucket, prefix, lastKey), e);
|
153
|
+
}
|
154
|
+
|
155
|
+
log.warn(String.format("Could not get file list from bucket:%s", bucket));
|
156
|
+
log.warn(e.getMessage());
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
// String nextToken = base64Encode(0x0a + 0x01~0x27 + filePath);
|
161
|
+
private static String base64Encode(String path)
|
162
|
+
{
|
163
|
+
byte[] encoding;
|
164
|
+
byte[] utf8 = path.getBytes(Charsets.UTF_8);
|
165
|
+
log.debug(String.format("path string: %s ,path length:%s \" + ", path, utf8.length));
|
166
|
+
|
167
|
+
encoding = new byte[utf8.length + 2];
|
168
|
+
encoding[0] = 0x0a;
|
169
|
+
encoding[1] = new Byte(String.valueOf(path.length()));
|
170
|
+
System.arraycopy(utf8, 0, encoding, 2, utf8.length);
|
171
|
+
|
172
|
+
String s = BaseEncoding.base64().encode(encoding);
|
173
|
+
log.debug(String.format("last_path(base64 encoded): %s", s));
|
174
|
+
return s;
|
175
|
+
}
|
176
|
+
|
177
|
+
public enum AuthMethod
|
178
|
+
{
|
179
|
+
private_key("private_key"),
|
180
|
+
compute_engine("compute_engine"),
|
181
|
+
json_key("json_key");
|
182
|
+
|
183
|
+
private final String string;
|
184
|
+
|
185
|
+
AuthMethod(String string)
|
186
|
+
{
|
187
|
+
this.string = string;
|
188
|
+
}
|
189
|
+
|
190
|
+
public String getString()
|
191
|
+
{
|
192
|
+
return string;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
}
|