embulk-filter-protobuf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "protobuf", "org.embulk.filter.protobuf.ProtobufFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,264 @@
1
+ package org.embulk.filter.protobuf;
2
+
3
+ import com.google.common.io.BaseEncoding;
4
+ import com.google.protobuf.InvalidProtocolBufferException;
5
+ import com.google.protobuf.Message;
6
+ import com.google.protobuf.MessageOrBuilder;
7
+ import com.google.protobuf.util.JsonFormat;
8
+
9
+ import org.embulk.filter.protobuf.ProtobufFilterPlugin.ColumnTask;
10
+ import org.embulk.filter.protobuf.ProtobufFilterPlugin.PluginTask;
11
+
12
+ import org.embulk.plugin.PluginClassLoader;
13
+ import org.embulk.spi.Column;
14
+ import org.embulk.spi.ColumnVisitor;
15
+ import org.embulk.spi.DataException;
16
+ import org.embulk.spi.PageBuilder;
17
+ import org.embulk.spi.PageReader;
18
+
19
+ import java.lang.reflect.InvocationTargetException;
20
+ import java.lang.reflect.Method;
21
+ import java.net.URLClassLoader;
22
+ import java.nio.file.Path;
23
+ import java.nio.file.Paths;
24
+ import java.util.HashMap;
25
+ import java.util.List;
26
+ import java.util.Map;
27
+
28
+ public class ColumnVisitorImpl implements ColumnVisitor
29
+ {
30
+ private final PluginTask pluginTask;
31
+ private final PageReader pageReader;
32
+ private final PageBuilder pageBuilder;
33
+ private final Map<String, ColumnTask> columnTaskMap;
34
+
35
+ ColumnVisitorImpl(PluginTask task, PageReader reader, PageBuilder builder)
36
+ {
37
+ this.pluginTask = task;
38
+ this.pageReader = reader;
39
+ this.pageBuilder = builder;
40
+ this.columnTaskMap = getColumnMap(task.getColumns());
41
+ addProtobufJarToClasspath();
42
+ }
43
+
44
+ private Map<String, ColumnTask> getColumnMap(
45
+ List<ColumnTask> columnTasks)
46
+ {
47
+ Map<String, ColumnTask> m = new HashMap<>();
48
+ for (ColumnTask columnTask : columnTasks) {
49
+ m.put(columnTask.getName(), columnTask);
50
+ }
51
+ return m;
52
+ }
53
+
54
+ private void addProtobufJarToClasspath()
55
+ {
56
+ Path protobufJarPath = Paths.get(pluginTask.getProtobufJarPath());
57
+ // FIXME:
58
+ // getClass().getClassLoader() returns sun.misc.Launcher$AppClassLoader
59
+ // and it cannot be cast to PluginClassLoader in gradle test.
60
+ try {
61
+ PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
62
+ loader.addPath(protobufJarPath);
63
+ }
64
+ catch (ClassCastException e) {
65
+ }
66
+ }
67
+
68
+ private ColumnTask getColumnTask(Column column)
69
+ {
70
+ String colName = column.getName();
71
+ return columnTaskMap.get(colName);
72
+ }
73
+
74
+ private byte[] decodeMessage(String messageAsString)
75
+ {
76
+ byte[] decoded = null;
77
+ String encoding = pluginTask.getEncoding();
78
+ if (encoding.equals("Base64")) {
79
+ decoded = BaseEncoding.base64().decode(messageAsString);
80
+ }
81
+ return decoded;
82
+ }
83
+
84
+ private String encodeMessage(byte[] messageAsBytes)
85
+ {
86
+ String encoded = null;
87
+ String encoding = pluginTask.getEncoding();
88
+ if (encoding.equals("Base64")) {
89
+ encoded = BaseEncoding.base64().encode(messageAsBytes);
90
+ }
91
+ return encoded;
92
+ }
93
+
94
+ private String convertMessageBytesToJson(
95
+ byte[] messageAsBytes, String messageName)
96
+ {
97
+ URLClassLoader loader = (URLClassLoader) getClass().getClassLoader();
98
+ // Get a message object
99
+ Object message = null;
100
+ try {
101
+ Class<?> messageClass = loader.loadClass(messageName);
102
+ Method parseFrom = messageClass.getMethod(
103
+ "parseFrom", byte[].class);
104
+ message = parseFrom.invoke(
105
+ (Object) messageClass, (Object) messageAsBytes);
106
+ }
107
+ catch (ClassNotFoundException e) {
108
+ throw new DataException(e);
109
+ }
110
+ catch (NoSuchMethodException e) {
111
+ throw new DataException(e);
112
+ }
113
+ catch (IllegalAccessException e) {
114
+ throw new DataException(e);
115
+ }
116
+ catch (InvocationTargetException e) {
117
+ throw new DataException(e);
118
+ }
119
+ // Convert message object to json string
120
+ String messageAsString = null;
121
+ try {
122
+ messageAsString = JsonFormat.printer()
123
+ .omittingInsignificantWhitespace()
124
+ .print((MessageOrBuilder) message);
125
+ }
126
+ catch (InvalidProtocolBufferException e) {
127
+ throw new DataException(e);
128
+ }
129
+ return messageAsString;
130
+ }
131
+
132
+ private byte[] convertJsonToMessageBytes(
133
+ String messageAsJson, String messageName)
134
+ {
135
+ URLClassLoader loader = (URLClassLoader) getClass().getClassLoader();
136
+ // Get a message builder object
137
+ Message.Builder builder = null;
138
+ try {
139
+ Class<?> messageClass = loader.loadClass(messageName);
140
+ Method newBuilder = messageClass.getMethod("newBuilder");
141
+ builder = (Message.Builder) newBuilder.invoke(
142
+ (Object) messageClass);
143
+ }
144
+ catch (ClassNotFoundException e) {
145
+ throw new DataException(e);
146
+ }
147
+ catch (NoSuchMethodException e) {
148
+ throw new DataException(e);
149
+ }
150
+ catch (IllegalAccessException e) {
151
+ throw new DataException(e);
152
+ }
153
+ catch (InvocationTargetException e) {
154
+ throw new DataException(e);
155
+ }
156
+ // Convert message json to binary
157
+ byte[] messageAsBytes = null;
158
+ try {
159
+ JsonFormat.parser().merge(messageAsJson, builder);
160
+ messageAsBytes = builder.build().toByteArray();
161
+ }
162
+ catch (InvalidProtocolBufferException e) {
163
+ throw new DataException(e);
164
+ }
165
+ return messageAsBytes;
166
+ }
167
+
168
+ private String executeTask(ColumnTask colTask, Column column)
169
+ {
170
+ String messageName = colTask.getMessage();
171
+ // serialize
172
+ if (pluginTask.getDoSerialize().get()) {
173
+ String messageAsJson = pageReader.getString(column);
174
+ byte[] messageAsBytes = convertJsonToMessageBytes(
175
+ messageAsJson, messageName);
176
+ return encodeMessage(messageAsBytes);
177
+ }
178
+ // deserialize
179
+ else {
180
+ String messageAsString = pageReader.getString(column);
181
+ byte[] messageAsBytes = decodeMessage(messageAsString);
182
+ return convertMessageBytesToJson(messageAsBytes, messageName);
183
+ }
184
+ }
185
+
186
+ @Override
187
+ public void booleanColumn(Column outputColumn)
188
+ {
189
+ if (pageReader.isNull(outputColumn)) {
190
+ pageBuilder.setNull(outputColumn);
191
+ }
192
+ else {
193
+ pageBuilder.setBoolean(
194
+ outputColumn, pageReader.getBoolean(outputColumn));
195
+ }
196
+ }
197
+
198
+ @Override
199
+ public void longColumn(Column outputColumn)
200
+ {
201
+ if (pageReader.isNull(outputColumn)) {
202
+ pageBuilder.setNull(outputColumn);
203
+ }
204
+ else {
205
+ pageBuilder.setLong(
206
+ outputColumn, pageReader.getLong(outputColumn));
207
+ }
208
+ }
209
+
210
+ @Override
211
+ public void doubleColumn(Column outputColumn)
212
+ {
213
+ if (pageReader.isNull(outputColumn)) {
214
+ pageBuilder.setNull(outputColumn);
215
+ }
216
+ else {
217
+ pageBuilder.setDouble(
218
+ outputColumn, pageReader.getDouble(outputColumn));
219
+ }
220
+ }
221
+
222
+ @Override
223
+ public void stringColumn(Column outputColumn)
224
+ {
225
+ if (pageReader.isNull(outputColumn)) {
226
+ pageBuilder.setNull(outputColumn);
227
+ }
228
+ else {
229
+ ColumnTask task = getColumnTask(outputColumn);
230
+ if (task == null) {
231
+ pageBuilder.setString(
232
+ outputColumn, pageReader.getString(outputColumn));
233
+ }
234
+ else {
235
+ pageBuilder.setString(
236
+ outputColumn, executeTask(task, outputColumn));
237
+ }
238
+ }
239
+ }
240
+
241
+ @Override
242
+ public void timestampColumn(Column outputColumn)
243
+ {
244
+ if (pageReader.isNull(outputColumn)) {
245
+ pageBuilder.setNull(outputColumn);
246
+ }
247
+ else {
248
+ pageBuilder.setTimestamp(
249
+ outputColumn, pageReader.getTimestamp(outputColumn));
250
+ }
251
+ }
252
+
253
+ @Override
254
+ public void jsonColumn(Column outputColumn)
255
+ {
256
+ if (pageReader.isNull(outputColumn)) {
257
+ pageBuilder.setNull(outputColumn);
258
+ }
259
+ else {
260
+ pageBuilder.setJson(
261
+ outputColumn, pageReader.getJson(outputColumn));
262
+ }
263
+ }
264
+ }
@@ -0,0 +1,155 @@
1
+ package org.embulk.filter.protobuf;
2
+
3
+ import com.google.common.base.Optional;
4
+
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.Task;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.spi.Column;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.FilterPlugin;
14
+ import org.embulk.spi.Page;
15
+ import org.embulk.spi.PageBuilder;
16
+ import org.embulk.spi.PageOutput;
17
+ import org.embulk.spi.PageReader;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.type.Types;
20
+
21
+ import java.nio.file.Path;
22
+ import java.nio.file.Paths;
23
+ import java.util.Arrays;
24
+ import java.util.List;
25
+
26
+ public class ProtobufFilterPlugin implements FilterPlugin
27
+ {
28
+ public interface PluginTask extends Task
29
+ {
30
+ @Config("serialize")
31
+ @ConfigDefault("false")
32
+ public Optional<Boolean> getDoSerialize();
33
+
34
+ @Config("deserialize")
35
+ @ConfigDefault("false")
36
+ public Optional<Boolean> getDoDeserialize();
37
+
38
+ @Config("encoding")
39
+ public String getEncoding();
40
+
41
+ @Config("protobuf_jar_path")
42
+ public String getProtobufJarPath();
43
+
44
+ @Config("columns")
45
+ public List<ColumnTask> getColumns();
46
+ }
47
+
48
+ public interface ColumnTask extends Task
49
+ {
50
+ @Config("name")
51
+ public String getName();
52
+
53
+ @Config("message")
54
+ public String getMessage();
55
+ }
56
+
57
+ public void validate(PluginTask pluginTask, Schema inputSchema)
58
+ {
59
+ // validate 'serialize' and 'deserialize' in PluginTask
60
+ boolean doSerialize = pluginTask.getDoSerialize().get();
61
+ boolean doDeserialize = pluginTask.getDoDeserialize().get();
62
+ boolean bothTrue = doSerialize && doDeserialize;
63
+ boolean bothFalse = !doSerialize && !doDeserialize;
64
+ if (bothTrue || bothFalse) {
65
+ String errMsg = "Specify either 'serialize: true' or 'deserialize: true'.";
66
+ throw new ConfigException(errMsg);
67
+ }
68
+ // validate 'encoding' in PluginTask
69
+ String[] allowedEncordings = {"Base64"};
70
+ String encoding = pluginTask.getEncoding();
71
+ if (!Arrays.asList(allowedEncordings).contains(encoding)) {
72
+ String errMsg = "Specify 'encoding: Base64'.";
73
+ throw new ConfigException(errMsg);
74
+ }
75
+ // validate 'protobuf_jar_path' in PluginTask
76
+ Path protobufJarPath = Paths.get(pluginTask.getProtobufJarPath());
77
+ if (!protobufJarPath.toFile().exists()) {
78
+ String errMsg = "The jar file does not exist.";
79
+ throw new ConfigException(errMsg);
80
+ }
81
+ // validate 'name' in ColumnTask
82
+ for (ColumnTask colTask : pluginTask.getColumns()) {
83
+ // throws exception when the column does not exist
84
+ Column column = inputSchema.lookupColumn(colTask.getName());
85
+ // TODO: accept both STRING and JSON type when 'serilialize': true
86
+ if (!Types.STRING.equals(column.getType())) {
87
+ String errMsg = "Type of input columns must be string.";
88
+ throw new ConfigException(errMsg);
89
+ }
90
+ }
91
+ }
92
+
93
+ @Override
94
+ public void transaction(ConfigSource config, Schema inputSchema,
95
+ FilterPlugin.Control control)
96
+ {
97
+ PluginTask task = config.loadConfig(PluginTask.class);
98
+ validate(task, inputSchema);
99
+ Schema outputSchema = inputSchema;
100
+ control.run(task.dump(), outputSchema);
101
+ }
102
+
103
+ @Override
104
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
105
+ Schema outputSchema, PageOutput output)
106
+ {
107
+ PluginTask task = taskSource.loadTask(PluginTask.class);
108
+ PageBuilder pageBuilder = new PageBuilder(
109
+ Exec.getBufferAllocator(), outputSchema, output);
110
+ PageReader pageReader = new PageReader(inputSchema);
111
+ ColumnVisitorImpl visitor = new ColumnVisitorImpl(
112
+ task, pageReader, pageBuilder);
113
+
114
+ return new PageOutputImpl(
115
+ pageReader, pageBuilder, outputSchema, visitor);
116
+ }
117
+
118
+ public static class PageOutputImpl implements PageOutput
119
+ {
120
+ private PageReader pageReader;
121
+ private PageBuilder pageBuilder;
122
+ private Schema outputSchema;
123
+ private ColumnVisitorImpl visitor;
124
+
125
+ PageOutputImpl(PageReader pageReader, PageBuilder pageBuilder, Schema outputSchema, ColumnVisitorImpl visitor)
126
+ {
127
+ this.pageReader = pageReader;
128
+ this.pageBuilder = pageBuilder;
129
+ this.outputSchema = outputSchema;
130
+ this.visitor = visitor;
131
+ }
132
+
133
+ @Override
134
+ public void add(Page page)
135
+ {
136
+ pageReader.setPage(page);
137
+ while (pageReader.nextRecord()) {
138
+ outputSchema.visitColumns(visitor);
139
+ pageBuilder.addRecord();
140
+ }
141
+ }
142
+
143
+ @Override
144
+ public void finish()
145
+ {
146
+ pageBuilder.finish();
147
+ }
148
+
149
+ @Override
150
+ public void close()
151
+ {
152
+ pageBuilder.close();
153
+ }
154
+ };
155
+ }
@@ -0,0 +1,117 @@
1
+ package org.embulk.filter.protobuf;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.filter.protobuf.ProtobufFilterPlugin.PageOutputImpl;
5
+ import org.embulk.filter.protobuf.ProtobufFilterPlugin.PluginTask;
6
+ import org.embulk.spi.Page;
7
+ import org.embulk.spi.PageBuilder;
8
+ import org.embulk.spi.PageOutput;
9
+ import org.embulk.spi.PageReader;
10
+ import org.embulk.spi.PageTestUtils;
11
+ import org.embulk.spi.Schema;
12
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
13
+ import org.embulk.spi.type.Types;
14
+ import org.embulk.spi.util.Pages;
15
+
16
+ import org.junit.Before;
17
+ import org.junit.Rule;
18
+ import org.junit.Test;
19
+ import static org.embulk.filter.protobuf.TestProtobufFilterPlugin.taskFromYamlString;
20
+ import static org.junit.Assert.assertEquals;
21
+
22
+ import java.io.File;
23
+ import java.util.List;
24
+
25
+ public class TestColumnVisitorImpl
26
+ {
27
+ @Rule
28
+ public EmbulkTestRuntime runtime;
29
+
30
+ private String protobufJarPath;
31
+
32
+ public TestColumnVisitorImpl()
33
+ {
34
+ this.runtime = new EmbulkTestRuntime();
35
+
36
+ String pluginBasePath = new File(".").getAbsoluteFile().getParent();
37
+ this.protobufJarPath = String.format(
38
+ "%s/example/AddressBookProtosProto3Syntax.jar",
39
+ pluginBasePath);
40
+ }
41
+
42
+ private List<Object[]> filter(
43
+ PluginTask task, Schema inputSchema, Object... objects)
44
+ {
45
+ MockPageOutput output = new MockPageOutput();
46
+ Schema outputSchema = inputSchema;
47
+ PageBuilder pageBuilder = new PageBuilder(
48
+ runtime.getBufferAllocator(), outputSchema, output);
49
+ PageReader pageReader = new PageReader(inputSchema);
50
+ ColumnVisitorImpl visitor = new ColumnVisitorImpl(
51
+ task, pageReader, pageBuilder);
52
+
53
+ List<Page> pages = PageTestUtils.buildPage(
54
+ runtime.getBufferAllocator(), inputSchema, objects);
55
+ PageOutput mockPageOutput = new PageOutputImpl(
56
+ pageReader, pageBuilder, outputSchema, visitor);
57
+ for (Page page : pages) {
58
+ mockPageOutput.add(page);
59
+ }
60
+ mockPageOutput.finish();
61
+ mockPageOutput.close();
62
+ return Pages.toObjects(outputSchema, output.pages);
63
+ }
64
+
65
+ @Test
66
+ public void testExecuteTask_serialize()
67
+ {
68
+ PluginTask task = taskFromYamlString(
69
+ "type: protobuf",
70
+ "serialize: true",
71
+ "encoding: Base64",
72
+ "protobuf_jar_path: " + protobufJarPath,
73
+ "columns:",
74
+ " - {name: to serialize, message: com.example.tutorial.AddressBookProtos$Person}"
75
+ );
76
+ Schema inputSchema = Schema.builder()
77
+ .add("to serialize", Types.STRING)
78
+ .build();
79
+ List<Object[]> records = filter(task, inputSchema,
80
+ // generated from proto2-syntax .proto
81
+ "{\"name\":\"John Doe\",\"id\":1234,\"email\":\"jdoe@example.com\",\"phone\":[{\"number\":\"111-0000\",\"type\":\"MOBILE\"},{\"number\":\"555-4321\",\"type\":\"HOME\"}]}",
82
+ // generated from proto3-syntax .proto
83
+ "{\"name\":\"John Doe\",\"id\":1234,\"email\":\"jdoe@example.com\",\"phone\":[{\"number\":\"111-0000\"},{\"number\":\"555-4321\",\"type\":\"HOME\"}]}"
84
+ );
85
+ assertEquals(2, records.size());
86
+ String expected = "CghKb2huIERvZRDSCRoQamRvZUBleGFtcGxlLmNvbSIKCggxMTEtMDAwMCIMCgg1NTUtNDMyMRAB";
87
+ assertEquals(expected, records.get(0)[0]);
88
+ assertEquals(expected, records.get(1)[0]);
89
+ }
90
+
91
+ @Test
92
+ public void testExecuteTask_deserialize()
93
+ {
94
+ PluginTask task = taskFromYamlString(
95
+ "type: protobuf",
96
+ "deserialize: true",
97
+ "encoding: Base64",
98
+ "protobuf_jar_path: " + protobufJarPath,
99
+ "columns:",
100
+ " - {name: to deserialize, message: com.example.tutorial.AddressBookProtos$Person}"
101
+ );
102
+ Schema inputSchema = Schema.builder()
103
+ .add("to deserialize", Types.STRING)
104
+ .build();
105
+ List<Object[]> records = filter(
106
+ task, inputSchema,
107
+ // generated from proto2-syntax .proto
108
+ "CghKb2huIERvZRDSCRoQamRvZUBleGFtcGxlLmNvbSIMCggxMTEtMDAwMBAAIgwKCDU1NS00MzIxEAE=",
109
+ // generated from proto3-syntax .proto
110
+ "CghKb2huIERvZRDSCRoQamRvZUBleGFtcGxlLmNvbSIKCggxMTEtMDAwMCIMCgg1NTUtNDMyMRAB"
111
+ );
112
+ assertEquals(2, records.size());
113
+ String expected = "{\"name\":\"John Doe\",\"id\":1234,\"email\":\"jdoe@example.com\",\"phone\":[{\"number\":\"111-0000\"},{\"number\":\"555-4321\",\"type\":\"HOME\"}]}";
114
+ assertEquals(expected, records.get(0)[0]);
115
+ assertEquals(expected, records.get(1)[0]);
116
+ }
117
+ }