embulk-output-gcs_streaming 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ package org.embulk.output.gcs_streaming;
2
+
3
+ import org.junit.BeforeClass;
4
+ import org.junit.Test;
5
+ import static org.junit.Assert.assertEquals;
6
+ import static org.junit.Assume.assumeNotNull;
7
+
8
+ import java.util.Optional;
9
+
10
+ public class TestGcsClient
11
+ {
12
+ private static String gcpProjectId;
13
+ private static Optional<String> gcpJsonKeyfile;
14
+ /*
15
+ * This test case requires environment variables
16
+ * GCP_PROJECT_ID
17
+ * GCP_JSON_KEYFILE
18
+ * And prepare gcloud authentication ADC, following command.
19
+ * $ gcloud auth application-default login
20
+ */
21
+ @BeforeClass
22
+ public static void initializeConstant()
23
+ {
24
+ gcpProjectId = System.getenv("GCP_PROJECT_ID");
25
+ gcpJsonKeyfile = Optional.of(System.getenv("GCP_JSON_KEYFILE"));
26
+ assumeNotNull(gcpJsonKeyfile, gcpProjectId);
27
+ // skip test cases, if environment variables are not set.
28
+ }
29
+
30
+ @Test
31
+ public void testGetStorageSuccess() throws RuntimeException
32
+ {
33
+ Optional<String> empty = Optional.empty();
34
+ GcsClient client = new GcsClient(gcpProjectId, empty);
35
+ }
36
+
37
+ @Test
38
+ public void testGetStorageSuccessFromJsonKeyfile() throws RuntimeException
39
+ {
40
+ GcsClient client = new GcsClient(gcpProjectId, gcpJsonKeyfile);
41
+ }
42
+
43
+ @Test(expected = RuntimeException.class)
44
+ public void testGetStorageFailFromJsonKeyfile() throws RuntimeException
45
+ {
46
+ Optional<String> notFoundJsonKeyfile = Optional.of("/path/to/key.json");
47
+ GcsClient client = new GcsClient(gcpProjectId, notFoundJsonKeyfile);
48
+ assertEquals(1, 2);
49
+ }
50
+ }
@@ -0,0 +1,305 @@
1
+ package org.embulk.output.gcs_streaming;
2
+
3
+ import com.google.cloud.storage.Blob;
4
+ import com.google.cloud.storage.BlobId;
5
+ import com.google.cloud.storage.Storage;
6
+ import com.google.cloud.storage.StorageOptions;
7
+ import com.google.common.collect.ImmutableList;
8
+ import com.google.common.collect.ImmutableMap;
9
+ import com.google.common.collect.Lists;
10
+ import com.google.common.io.Resources;
11
+
12
+ import org.embulk.EmbulkTestRuntime;
13
+ import org.embulk.config.ConfigSource;
14
+ import org.embulk.config.TaskReport;
15
+ import org.embulk.config.TaskSource;
16
+
17
+ import org.embulk.output.gcs_streaming.GcsStreamingFileOutputPlugin.PluginTask;
18
+
19
+ import org.embulk.spi.Buffer;
20
+ import org.embulk.spi.Exec;
21
+ import org.embulk.spi.FileOutputRunner;
22
+ import org.embulk.spi.OutputPlugin;
23
+ import org.embulk.spi.Schema;
24
+ import org.embulk.spi.TransactionalFileOutput;
25
+ import org.embulk.standards.CsvParserPlugin;
26
+
27
+ import org.junit.Before;
28
+ import org.junit.BeforeClass;
29
+ import org.junit.Rule;
30
+ import org.junit.Test;
31
+
32
+ import static org.junit.Assert.assertEquals;
33
+ import static org.junit.Assume.assumeNotNull;
34
+
35
+ import java.io.BufferedReader;
36
+ import java.io.ByteArrayOutputStream;
37
+ import java.io.FileInputStream;
38
+ import java.io.InputStream;
39
+ import java.io.IOException;
40
+ import java.io.StringReader;
41
+ import java.util.Arrays;
42
+ import java.util.List;
43
+ import java.util.Optional;
44
+
45
+ public class TestGcsStreamingFileOutputPlugin
46
+ {
47
+ private static String gcpProjectId;
48
+ private static String gcpBucket;
49
+ private static Optional<String> gcpJsonKeyfile;
50
+ private static String localPathPrefix;
51
+ private FileOutputRunner runner;
52
+ /*
53
+ * This test case requires environment variables
54
+ * GCP_PROJECT_ID
55
+ * GCP_BUCKET
56
+ * GCP_JSON_KEYFILE
57
+ * And prepare gcloud authentication ADC, following command.
58
+ * $ gcloud auth application-default login
59
+ */
60
+ @BeforeClass
61
+ public static void initializeConstant()
62
+ {
63
+ gcpProjectId = System.getenv("GCP_PROJECT_ID");
64
+ gcpJsonKeyfile = Optional.of(System.getenv("GCP_JSON_KEYFILE"));
65
+ gcpBucket = System.getenv("GCP_BUCKET");
66
+ localPathPrefix = Resources.getResource("test.000.csv").getPath();
67
+ assumeNotNull(gcpJsonKeyfile, gcpProjectId, gcpBucket, localPathPrefix);
68
+ // skip test cases, if environment variables are not set.
69
+ }
70
+
71
+ @Rule
72
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
73
+ private GcsStreamingFileOutputPlugin plugin;
74
+
75
+ @Before
76
+ public void createResources()
77
+ {
78
+ plugin = new GcsStreamingFileOutputPlugin();
79
+ runner = new FileOutputRunner(runtime.getInstance(GcsStreamingFileOutputPlugin.class));
80
+ }
81
+
82
+ @Test
83
+ public void checkDefaultValues()
84
+ {
85
+ ConfigSource config = Exec.newConfigSource()
86
+ .set("in", inputConfig())
87
+ .set("parser", parserConfig(schemaConfig()))
88
+ .set("type", "gcs_streaming")
89
+ .set("project_id", gcpProjectId)
90
+ .set("bucket", gcpBucket)
91
+ .set("json_keyfile", gcpJsonKeyfile)
92
+ .set("path_prefix", "tests/data")
93
+ .set("file_ext", ".csv")
94
+ .set("formatter", formatterConfig());
95
+
96
+ PluginTask task = config.loadConfig(PluginTask.class);
97
+
98
+ assertEquals(gcpProjectId, task.getProjectId());
99
+ assertEquals(gcpBucket, task.getBucket());
100
+ assertEquals(gcpJsonKeyfile, task.getJsonKeyfile());
101
+ assertEquals(".%03d.%02d.", task.getSequenceFormat());
102
+ assertEquals("application/octet-stream", task.getContentType());
103
+ assertEquals("tests/data", task.getPathPrefix());
104
+ assertEquals(".csv", task.getFileNameExtension());
105
+ }
106
+
107
+ @Test
108
+ public void testTransaction()
109
+ {
110
+ ConfigSource config = Exec.newConfigSource()
111
+ .set("in", inputConfig())
112
+ .set("parser", parserConfig(schemaConfig()))
113
+ .set("type", "gcs_streaming")
114
+ .set("project_id", gcpProjectId)
115
+ .set("bucket", gcpBucket)
116
+ .set("path_prefix", "tests/data")
117
+ .set("file_ext", "csv")
118
+ .set("formatter", formatterConfig());
119
+
120
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
121
+
122
+ runner.transaction(config, schema, 0, new Control());
123
+ }
124
+
125
+ @Test
126
+ public void testTransactionWithJsonKeyfile()
127
+ {
128
+ ConfigSource config = Exec.newConfigSource()
129
+ .set("in", inputConfig())
130
+ .set("parser", parserConfig(schemaConfig()))
131
+ .set("type", "gcs_streaming")
132
+ .set("project_id", gcpProjectId)
133
+ .set("bucket", gcpBucket)
134
+ .set("json_keyfile", gcpJsonKeyfile)
135
+ .set("path_prefix", "tests/data")
136
+ .set("file_ext", "csv")
137
+ .set("formatter", formatterConfig());
138
+
139
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
140
+
141
+ runner.transaction(config, schema, 0, new Control());
142
+ }
143
+
144
+ @Test
145
+ public void testResume()
146
+ {
147
+ // no support resume
148
+ }
149
+
150
+ @Test
151
+ public void testCleanup()
152
+ {
153
+ PluginTask task = config().loadConfig(PluginTask.class);
154
+ plugin.cleanup(task.dump(), 0, Lists.newArrayList()); // no errors happens
155
+ }
156
+
157
+ @Test
158
+ public void testGcsFileOutputByOpen() throws Exception
159
+ {
160
+ ConfigSource configSource = config();
161
+ PluginTask task = configSource.loadConfig(PluginTask.class);
162
+ Schema schema = configSource.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
163
+ runner.transaction(configSource, schema, 0, new Control());
164
+
165
+ TransactionalFileOutput output = plugin.open(task.dump(), 0);
166
+
167
+ output.nextFile();
168
+
169
+ FileInputStream is = new FileInputStream(localPathPrefix);
170
+ byte[] bytes = convertInputStreamToByte(is);
171
+ Buffer buffer = Buffer.wrap(bytes);
172
+ output.add(buffer);
173
+
174
+ output.finish();
175
+ output.commit();
176
+
177
+ assertRecords(getFileContentsFromGcs(task));
178
+ }
179
+
180
+ public ConfigSource config()
181
+ {
182
+ return Exec.newConfigSource()
183
+ .set("in", inputConfig())
184
+ .set("parser", parserConfig(schemaConfig()))
185
+ .set("type", "gcs_streaming")
186
+ .set("project_id", gcpProjectId)
187
+ .set("bucket", gcpBucket)
188
+ .set("path_prefix", "tests/data")
189
+ .set("file_ext", "csv")
190
+ .set("formatter", formatterConfig());
191
+ }
192
+
193
+ private class Control implements OutputPlugin.Control
194
+ {
195
+ @Override
196
+ public List<TaskReport> run(TaskSource taskSource)
197
+ {
198
+ return Lists.newArrayList(Exec.newTaskReport());
199
+ }
200
+ }
201
+
202
+ private ImmutableMap<String, Object> inputConfig()
203
+ {
204
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
205
+ builder.put("type", "file");
206
+ builder.put("path_prefix", localPathPrefix);
207
+ builder.put("last_path", "");
208
+ return builder.build();
209
+ }
210
+
211
+ private ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
212
+ {
213
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
214
+ builder.put("type", "csv");
215
+ builder.put("newline", "CRLF");
216
+ builder.put("delimiter", ",");
217
+ builder.put("quote", "\"");
218
+ builder.put("escape", "\"");
219
+ builder.put("trim_if_not_quoted", false);
220
+ builder.put("skip_header_lines", 1);
221
+ builder.put("allow_extra_columns", false);
222
+ builder.put("allow_optional_columns", false);
223
+ builder.put("columns", schemaConfig);
224
+ return builder.build();
225
+ }
226
+
227
+ private ImmutableList<Object> schemaConfig()
228
+ {
229
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
230
+ builder.add(ImmutableMap.of("name", "id", "type", "long"));
231
+ builder.add(ImmutableMap.of("name", "account", "type", "string"));
232
+ builder.add(ImmutableMap.of("name", "ts", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
233
+ builder.add(ImmutableMap.of("name", "dt", "type", "timestamp", "format", "%Y%m%d"));
234
+ builder.add(ImmutableMap.of("name", "message", "type", "string"));
235
+ return builder.build();
236
+ }
237
+
238
+ private ImmutableMap<String, Object> formatterConfig()
239
+ {
240
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
241
+ builder.put("type", "csv");
242
+ builder.put("header_line", true);
243
+ return builder.build();
244
+ }
245
+
246
+ private void assertRecords(ImmutableList<List<String>> records)
247
+ {
248
+ assertEquals(3, records.size());
249
+ {
250
+ List<String> record = records.get(1);
251
+ assertEquals("1", record.get(0));
252
+ assertEquals("account1", record.get(1));
253
+ assertEquals("2020-01-01 00:00:00", record.get(2));
254
+ assertEquals("20200101", record.get(3));
255
+ assertEquals("init", record.get(4));
256
+ }
257
+
258
+ {
259
+ List<String> record = records.get(2);
260
+ assertEquals("2", record.get(0));
261
+ assertEquals("account2", record.get(1));
262
+ assertEquals("2020-02-01 12:00:00", record.get(2));
263
+ assertEquals("20200201", record.get(3));
264
+ assertEquals("init", record.get(4));
265
+ }
266
+ }
267
+
268
+ private ImmutableList<List<String>> getFileContentsFromGcs(PluginTask task) throws Exception
269
+ {
270
+ Storage storage = StorageOptions.newBuilder()
271
+ .setProjectId(task.getProjectId())
272
+ .build()
273
+ .getService();
274
+
275
+ String blobName = task.getPathPrefix() + String.format(task.getSequenceFormat(), 0, 0) + task.getFileNameExtension();
276
+ BlobId blobId = BlobId.of(task.getBucket(), blobName);
277
+ Blob blob = storage.get(blobId);
278
+ byte[] byteContent = blob.getContent(); // one or multiple RPC calls will be issued
279
+ String strContent = new String(byteContent);
280
+
281
+ ImmutableList.Builder<List<String>> builder = new ImmutableList.Builder<>();
282
+
283
+ String line;
284
+ BufferedReader reader = new BufferedReader(new StringReader(strContent));
285
+ while ((line = reader.readLine()) != null) {
286
+ List<String> records = Arrays.asList(line.split(",", 0));
287
+ builder.add(records);
288
+ }
289
+ return builder.build();
290
+ }
291
+
292
+ private byte[] convertInputStreamToByte(InputStream is) throws IOException
293
+ {
294
+ ByteArrayOutputStream bo = new ByteArrayOutputStream();
295
+ byte [] buffer = new byte[1024];
296
+ while (true) {
297
+ int len = is.read(buffer);
298
+ if (len < 0) {
299
+ break;
300
+ }
301
+ bo.write(buffer, 0, len);
302
+ }
303
+ return bo.toByteArray();
304
+ }
305
+ }
@@ -0,0 +1,3 @@
1
+ id,account,ts,dt,message
2
+ 1,account1,2020-01-01 00:00:00,20200101,init
3
+ 2,account2,2020-02-01 12:00:00,20200201,init
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-gcs_streaming
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - irotoris
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-09-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '12.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ description: Stores files on Google Cloud Storage with streaming uplaod.
42
+ email:
43
+ - shiroto00@yahoo.co.jp
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - LICENSE.txt
50
+ - README.md
51
+ - build.gradle
52
+ - classpath/animal-sniffer-annotations-1.17.jar
53
+ - classpath/api-common-1.7.0.jar
54
+ - classpath/checker-compat-qual-2.5.2.jar
55
+ - classpath/commons-codec-1.10.jar
56
+ - classpath/commons-logging-1.2.jar
57
+ - classpath/embulk-output-gcs_streaming-0.1.0.jar
58
+ - classpath/error_prone_annotations-2.3.2.jar
59
+ - classpath/failureaccess-1.0.1.jar
60
+ - classpath/gax-1.44.0.jar
61
+ - classpath/gax-httpjson-0.61.0.jar
62
+ - classpath/google-api-client-1.27.0.jar
63
+ - classpath/google-api-services-storage-v1-rev20181109-1.27.0.jar
64
+ - classpath/google-auth-library-credentials-0.15.0.jar
65
+ - classpath/google-auth-library-oauth2-http-0.15.0.jar
66
+ - classpath/google-cloud-core-1.71.0.jar
67
+ - classpath/google-cloud-core-http-1.71.0.jar
68
+ - classpath/google-cloud-storage-1.71.0.jar
69
+ - classpath/google-http-client-1.29.0.jar
70
+ - classpath/google-http-client-apache-2.1.0.jar
71
+ - classpath/google-http-client-appengine-1.29.0.jar
72
+ - classpath/google-http-client-jackson2-1.29.0.jar
73
+ - classpath/google-oauth-client-1.27.0.jar
74
+ - classpath/grpc-context-1.18.0.jar
75
+ - classpath/gson-2.7.jar
76
+ - classpath/guava-27.1-android.jar
77
+ - classpath/httpclient-4.5.5.jar
78
+ - classpath/httpcore-4.4.9.jar
79
+ - classpath/j2objc-annotations-1.1.jar
80
+ - classpath/jackson-core-2.9.6.jar
81
+ - classpath/javax.annotation-api-1.3.2.jar
82
+ - classpath/jsr305-3.0.2.jar
83
+ - classpath/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar
84
+ - classpath/opencensus-api-0.19.2.jar
85
+ - classpath/opencensus-contrib-http-util-0.19.2.jar
86
+ - classpath/proto-google-common-protos-1.15.0.jar
87
+ - classpath/proto-google-iam-v1-0.12.0.jar
88
+ - classpath/protobuf-java-3.7.0.jar
89
+ - classpath/protobuf-java-util-3.7.0.jar
90
+ - classpath/threetenbp-1.3.3.jar
91
+ - config/checkstyle/checkstyle.xml
92
+ - config/checkstyle/default.xml
93
+ - gradle/wrapper/gradle-wrapper.jar
94
+ - gradle/wrapper/gradle-wrapper.properties
95
+ - gradlew
96
+ - gradlew.bat
97
+ - lib/embulk/output/gcs_streaming.rb
98
+ - src/main/java/org/embulk/output/gcs_streaming/GcsClient.java
99
+ - src/main/java/org/embulk/output/gcs_streaming/GcsStreamingFileOutputPlugin.java
100
+ - src/test/java/org/embulk/output/gcs_streaming/TestGcsClient.java
101
+ - src/test/java/org/embulk/output/gcs_streaming/TestGcsStreamingFileOutputPlugin.java
102
+ - src/test/resources/test.000.csv
103
+ homepage: https://github.com/irotoris/embulk-output-gcs_streaming
104
+ licenses:
105
+ - MIT
106
+ metadata: {}
107
+ post_install_message:
108
+ rdoc_options: []
109
+ require_paths:
110
+ - lib
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
121
+ requirements: []
122
+ rubyforge_project:
123
+ rubygems_version: 2.6.8
124
+ signing_key:
125
+ specification_version: 4
126
+ summary: Gcs Streaming file output plugin for Embulk
127
+ test_files: []