embulk-parser-poi_excel 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +120 -0
- data/build.gradle +77 -0
- data/classpath/commons-codec-1.9.jar +0 -0
- data/classpath/embulk-parser-poi_excel-0.1.0.jar +0 -0
- data/classpath/embulk-standards-0.7.5.jar +0 -0
- data/classpath/poi-3.13.jar +0 -0
- data/classpath/poi-ooxml-3.13.jar +0 -0
- data/classpath/poi-ooxml-schemas-3.13.jar +0 -0
- data/classpath/stax-api-1.0.1.jar +0 -0
- data/classpath/xmlbeans-2.6.0.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/guess/poi_excel.rb +61 -0
- data/lib/embulk/parser/poi_excel.rb +3 -0
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +39 -0
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +199 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/AbstractPoiExcelCellAttributeVisitor.java +133 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellCommentVisitor.java +68 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java +117 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java +205 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellVisitor.java +194 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColorVisitor.java +81 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnIndex.java +174 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +146 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorFactory.java +171 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorValue.java +63 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/BooleanCellVisitor.java +54 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +41 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/DoubleCellVisitor.java +54 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/LongCellVisitor.java +54 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java +63 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/TimestampCellVisitor.java +73 -0
- data/src/test/java/org/embulk/parser/EmbulkPluginTester.java +176 -0
- data/src/test/java/org/embulk/parser/EmbulkTestFileInputPlugin.java +83 -0
- data/src/test/java/org/embulk/parser/EmbulkTestOutputPlugin.java +193 -0
- data/src/test/java/org/embulk/parser/EmbulkTestParserConfig.java +51 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java +187 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellComment.java +42 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java +125 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java +132 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java +188 -0
- data/src/test/resources/org/embulk/parser/poi_excel/test1.xls +0 -0
- metadata +118 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
package org.embulk.parser.poi_excel.visitor.embulk;
|
2
|
+
|
3
|
+
import java.util.Date;
|
4
|
+
import java.util.TimeZone;
|
5
|
+
|
6
|
+
import org.apache.poi.ss.usermodel.DateUtil;
|
7
|
+
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
|
8
|
+
import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue;
|
9
|
+
import org.embulk.spi.Column;
|
10
|
+
import org.embulk.spi.time.Timestamp;
|
11
|
+
import org.embulk.spi.time.TimestampParser;
|
12
|
+
import org.embulk.spi.util.Timestamps;
|
13
|
+
|
14
|
+
public class TimestampCellVisitor extends CellVisitor {
|
15
|
+
|
16
|
+
public TimestampCellVisitor(PoiExcelVisitorValue visitorValue) {
|
17
|
+
super(visitorValue);
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public void visitCellValueNumeric(Column column, Object source, double value) {
|
22
|
+
TimestampParser parser = getTimestampParser(column);
|
23
|
+
TimeZone tz = parser.getDefaultTimeZone().toTimeZone();
|
24
|
+
Date date = DateUtil.getJavaDate(value, tz);
|
25
|
+
pageBuilder.setTimestamp(column, Timestamp.ofEpochMilli(date.getTime()));
|
26
|
+
}
|
27
|
+
|
28
|
+
@Override
|
29
|
+
public void visitCellValueString(Column column, Object source, String value) {
|
30
|
+
TimestampParser parser = getTimestampParser(column);
|
31
|
+
pageBuilder.setTimestamp(column, parser.parse(value));
|
32
|
+
}
|
33
|
+
|
34
|
+
@Override
|
35
|
+
public void visitCellValueBoolean(Column column, Object source, boolean value) {
|
36
|
+
throw new UnsupportedOperationException("unsupported conversion Excel boolean to Embulk timestamp.");
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public void visitCellValueError(Column column, Object source, int code) {
|
41
|
+
pageBuilder.setNull(column);
|
42
|
+
}
|
43
|
+
|
44
|
+
@Override
|
45
|
+
public void visitValueLong(Column column, Object source, long value) {
|
46
|
+
pageBuilder.setTimestamp(column, Timestamp.ofEpochMilli(value));
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void visitSheetName(Column column) {
|
51
|
+
throw new UnsupportedOperationException("unsupported conversion sheet_name to Embulk timestamp.");
|
52
|
+
}
|
53
|
+
|
54
|
+
@Override
|
55
|
+
public void visitRowNumber(Column column, int index1) {
|
56
|
+
throw new UnsupportedOperationException("unsupported conversion row_number to Embulk timestamp.");
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public void visitColumnNumber(Column column, int index1) {
|
61
|
+
throw new UnsupportedOperationException("unsupported conversion column_number to Embulk timestamp.");
|
62
|
+
}
|
63
|
+
|
64
|
+
private TimestampParser[] timestampParsers;
|
65
|
+
|
66
|
+
protected final TimestampParser getTimestampParser(Column column) {
|
67
|
+
if (timestampParsers == null) {
|
68
|
+
PluginTask task = visitorValue.getPluginTask();
|
69
|
+
timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getColumns());
|
70
|
+
}
|
71
|
+
return timestampParsers[column.getIndex()];
|
72
|
+
}
|
73
|
+
}
|
@@ -0,0 +1,176 @@
|
|
1
|
+
package org.embulk.parser;
|
2
|
+
|
3
|
+
import java.io.Closeable;
|
4
|
+
import java.io.File;
|
5
|
+
import java.net.URISyntaxException;
|
6
|
+
import java.net.URL;
|
7
|
+
import java.util.ArrayList;
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
import org.embulk.EmbulkEmbed;
|
11
|
+
import org.embulk.EmbulkEmbed.Bootstrap;
|
12
|
+
import org.embulk.config.ConfigLoader;
|
13
|
+
import org.embulk.config.ConfigSource;
|
14
|
+
import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord;
|
15
|
+
import org.embulk.plugin.InjectedPluginSource;
|
16
|
+
import org.embulk.spi.InputPlugin;
|
17
|
+
import org.embulk.spi.OutputPlugin;
|
18
|
+
import org.embulk.spi.ParserPlugin;
|
19
|
+
|
20
|
+
import com.google.inject.Binder;
|
21
|
+
import com.google.inject.Module;
|
22
|
+
import com.google.inject.Provider;
|
23
|
+
|
24
|
+
// @see https://github.com/embulk/embulk-input-jdbc/blob/master/embulk-input-mysql/src/test/java/org/embulk/input/mysql/EmbulkPluginTester.java
|
25
|
+
public class EmbulkPluginTester implements Closeable {
|
26
|
+
|
27
|
+
protected static class PluginDefinition {
|
28
|
+
public final Class<?> iface;
|
29
|
+
public final String name;
|
30
|
+
public final Class<?> impl;
|
31
|
+
|
32
|
+
public PluginDefinition(Class<?> iface, String name, Class<?> impl) {
|
33
|
+
this.iface = iface;
|
34
|
+
this.name = name;
|
35
|
+
this.impl = impl;
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
private final List<PluginDefinition> plugins = new ArrayList<>();
|
40
|
+
|
41
|
+
private EmbulkEmbed embulk;
|
42
|
+
|
43
|
+
private ConfigLoader configLoader;
|
44
|
+
|
45
|
+
private EmbulkTestFileInputPlugin embulkTestFileInputPlugin = new EmbulkTestFileInputPlugin();
|
46
|
+
|
47
|
+
private EmbulkTestOutputPlugin embulkTestOutputPlugin = new EmbulkTestOutputPlugin();
|
48
|
+
|
49
|
+
public EmbulkPluginTester() {
|
50
|
+
}
|
51
|
+
|
52
|
+
public EmbulkPluginTester(Class<?> iface, String name, Class<?> impl) {
|
53
|
+
addPlugin(iface, name, impl);
|
54
|
+
}
|
55
|
+
|
56
|
+
public void addPlugin(Class<?> iface, String name, Class<?> impl) {
|
57
|
+
plugins.add(new PluginDefinition(iface, name, impl));
|
58
|
+
}
|
59
|
+
|
60
|
+
public void addParserPlugin(String name, Class<? extends ParserPlugin> impl) {
|
61
|
+
addPlugin(ParserPlugin.class, name, impl);
|
62
|
+
}
|
63
|
+
|
64
|
+
protected EmbulkEmbed getEmbulkEmbed() {
|
65
|
+
if (embulk == null) {
|
66
|
+
Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
|
67
|
+
bootstrap.addModules(new Module() {
|
68
|
+
@Override
|
69
|
+
public void configure(Binder binder) {
|
70
|
+
EmbulkPluginTester.this.configurePlugin(binder);
|
71
|
+
|
72
|
+
for (PluginDefinition plugin : plugins) {
|
73
|
+
InjectedPluginSource.registerPluginTo(binder, plugin.iface, plugin.name, plugin.impl);
|
74
|
+
}
|
75
|
+
}
|
76
|
+
});
|
77
|
+
embulk = bootstrap.initializeCloseable();
|
78
|
+
}
|
79
|
+
return embulk;
|
80
|
+
}
|
81
|
+
|
82
|
+
protected void configurePlugin(Binder binder) {
|
83
|
+
// input plugins
|
84
|
+
InjectedPluginSource.registerPluginTo(binder, InputPlugin.class, EmbulkTestFileInputPlugin.TYPE,
|
85
|
+
EmbulkTestFileInputPlugin.class);
|
86
|
+
binder.bind(EmbulkTestFileInputPlugin.class).toProvider(new Provider<EmbulkTestFileInputPlugin>() {
|
87
|
+
|
88
|
+
@Override
|
89
|
+
public EmbulkTestFileInputPlugin get() {
|
90
|
+
return embulkTestFileInputPlugin;
|
91
|
+
}
|
92
|
+
});
|
93
|
+
|
94
|
+
// output plugins
|
95
|
+
InjectedPluginSource.registerPluginTo(binder, OutputPlugin.class, EmbulkTestOutputPlugin.TYPE,
|
96
|
+
EmbulkTestOutputPlugin.class);
|
97
|
+
binder.bind(EmbulkTestOutputPlugin.class).toProvider(new Provider<EmbulkTestOutputPlugin>() {
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public EmbulkTestOutputPlugin get() {
|
101
|
+
return embulkTestOutputPlugin;
|
102
|
+
}
|
103
|
+
});
|
104
|
+
}
|
105
|
+
|
106
|
+
public ConfigLoader getConfigLoader() {
|
107
|
+
if (configLoader == null) {
|
108
|
+
configLoader = getEmbulkEmbed().newConfigLoader();
|
109
|
+
}
|
110
|
+
return configLoader;
|
111
|
+
}
|
112
|
+
|
113
|
+
public ConfigSource newConfigSource() {
|
114
|
+
return getConfigLoader().newConfigSource();
|
115
|
+
}
|
116
|
+
|
117
|
+
public EmbulkTestParserConfig newParserConfig(String type) {
|
118
|
+
EmbulkTestParserConfig parser = new EmbulkTestParserConfig();
|
119
|
+
parser.setType(type);
|
120
|
+
return parser;
|
121
|
+
}
|
122
|
+
|
123
|
+
public List<OutputRecord> runParser(URL inFile, EmbulkTestParserConfig parser) {
|
124
|
+
File file;
|
125
|
+
try {
|
126
|
+
file = new File(inFile.toURI());
|
127
|
+
} catch (URISyntaxException e) {
|
128
|
+
throw new RuntimeException(e);
|
129
|
+
}
|
130
|
+
return runParser(file, parser);
|
131
|
+
}
|
132
|
+
|
133
|
+
public List<OutputRecord> runParser(File inFile, EmbulkTestParserConfig parser) {
|
134
|
+
ConfigSource in = newConfigSource();
|
135
|
+
in.set("type", "file");
|
136
|
+
in.set("path_prefix", inFile.getAbsolutePath());
|
137
|
+
in.set("parser", parser);
|
138
|
+
return runInput(in);
|
139
|
+
}
|
140
|
+
|
141
|
+
public List<OutputRecord> runParser(List<String> list, EmbulkTestParserConfig parser) {
|
142
|
+
ConfigSource in = newConfigSource();
|
143
|
+
in.set("type", EmbulkTestFileInputPlugin.TYPE);
|
144
|
+
in.set("parser", parser);
|
145
|
+
|
146
|
+
embulkTestFileInputPlugin.setText(list);
|
147
|
+
return runInput(in);
|
148
|
+
}
|
149
|
+
|
150
|
+
public List<OutputRecord> runInput(ConfigSource in) {
|
151
|
+
ConfigSource out = newConfigSource();
|
152
|
+
out.set("type", EmbulkTestOutputPlugin.TYPE);
|
153
|
+
|
154
|
+
embulkTestOutputPlugin.clearResult();
|
155
|
+
run(in, out);
|
156
|
+
return embulkTestOutputPlugin.getResult();
|
157
|
+
}
|
158
|
+
|
159
|
+
public void run(ConfigSource in, ConfigSource out) {
|
160
|
+
ConfigSource config = newConfigSource();
|
161
|
+
config.set("in", in);
|
162
|
+
config.set("out", out);
|
163
|
+
run(config);
|
164
|
+
}
|
165
|
+
|
166
|
+
public void run(ConfigSource config) {
|
167
|
+
getEmbulkEmbed().run(config);
|
168
|
+
}
|
169
|
+
|
170
|
+
@Override
|
171
|
+
public void close() {
|
172
|
+
if (embulk != null) {
|
173
|
+
embulk.destroy();
|
174
|
+
}
|
175
|
+
}
|
176
|
+
}
|
@@ -0,0 +1,83 @@
|
|
1
|
+
package org.embulk.parser;
|
2
|
+
|
3
|
+
import java.nio.charset.StandardCharsets;
|
4
|
+
import java.util.List;
|
5
|
+
|
6
|
+
import org.embulk.config.ConfigDiff;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.Task;
|
9
|
+
import org.embulk.config.TaskReport;
|
10
|
+
import org.embulk.config.TaskSource;
|
11
|
+
import org.embulk.spi.Buffer;
|
12
|
+
import org.embulk.spi.Exec;
|
13
|
+
import org.embulk.spi.FileInputPlugin;
|
14
|
+
import org.embulk.spi.TransactionalFileInput;
|
15
|
+
|
16
|
+
public class EmbulkTestFileInputPlugin implements FileInputPlugin {
|
17
|
+
|
18
|
+
public static final String TYPE = "EmbulkTestFileInputPlugin";
|
19
|
+
|
20
|
+
public interface PluginTask extends Task {
|
21
|
+
}
|
22
|
+
|
23
|
+
private List<String> list;
|
24
|
+
|
25
|
+
public void setText(List<String> list) {
|
26
|
+
this.list = list;
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control) {
|
31
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
32
|
+
|
33
|
+
int taskCount = 1;
|
34
|
+
return resume(task.dump(), taskCount, control);
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public ConfigDiff resume(TaskSource taskSource, int taskCount, FileInputPlugin.Control control) {
|
39
|
+
control.run(taskSource, taskCount);
|
40
|
+
return Exec.newConfigDiff();
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public void cleanup(TaskSource taskSource, int taskCount, List<TaskReport> successTaskReports) {
|
45
|
+
}
|
46
|
+
|
47
|
+
@Override
|
48
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex) {
|
49
|
+
return new TransactionalFileInput() {
|
50
|
+
private boolean eof = false;
|
51
|
+
private int index = 0;
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public Buffer poll() {
|
55
|
+
if (index < list.size()) {
|
56
|
+
String s = list.get(index++) + "\n";
|
57
|
+
return Buffer.copyOf(s.getBytes(StandardCharsets.UTF_8));
|
58
|
+
}
|
59
|
+
|
60
|
+
eof = true;
|
61
|
+
return null;
|
62
|
+
}
|
63
|
+
|
64
|
+
@Override
|
65
|
+
public boolean nextFile() {
|
66
|
+
return !eof;
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public void close() {
|
71
|
+
}
|
72
|
+
|
73
|
+
@Override
|
74
|
+
public void abort() {
|
75
|
+
}
|
76
|
+
|
77
|
+
@Override
|
78
|
+
public TaskReport commit() {
|
79
|
+
return Exec.newTaskReport();
|
80
|
+
}
|
81
|
+
};
|
82
|
+
}
|
83
|
+
}
|
@@ -0,0 +1,193 @@
|
|
1
|
+
package org.embulk.parser;
|
2
|
+
|
3
|
+
import java.text.MessageFormat;
|
4
|
+
import java.util.LinkedHashMap;
|
5
|
+
import java.util.List;
|
6
|
+
import java.util.Map;
|
7
|
+
import java.util.concurrent.CopyOnWriteArrayList;
|
8
|
+
|
9
|
+
import org.embulk.config.ConfigDiff;
|
10
|
+
import org.embulk.config.ConfigSource;
|
11
|
+
import org.embulk.config.Task;
|
12
|
+
import org.embulk.config.TaskReport;
|
13
|
+
import org.embulk.config.TaskSource;
|
14
|
+
import org.embulk.spi.Column;
|
15
|
+
import org.embulk.spi.ColumnVisitor;
|
16
|
+
import org.embulk.spi.Exec;
|
17
|
+
import org.embulk.spi.OutputPlugin;
|
18
|
+
import org.embulk.spi.Page;
|
19
|
+
import org.embulk.spi.PageReader;
|
20
|
+
import org.embulk.spi.Schema;
|
21
|
+
import org.embulk.spi.TransactionalPageOutput;
|
22
|
+
import org.embulk.spi.time.Timestamp;
|
23
|
+
import org.embulk.spi.time.TimestampFormatter;
|
24
|
+
|
25
|
+
public class EmbulkTestOutputPlugin implements OutputPlugin {
|
26
|
+
|
27
|
+
public static final String TYPE = "EmbulkTestOutputPlugin";
|
28
|
+
|
29
|
+
public interface PluginTask extends Task, TimestampFormatter.Task {
|
30
|
+
}
|
31
|
+
|
32
|
+
public static class OutputRecord {
|
33
|
+
private Map<String, Object> map = new LinkedHashMap<>();
|
34
|
+
|
35
|
+
public void set(String name, Object value) {
|
36
|
+
map.put(name, value);
|
37
|
+
}
|
38
|
+
|
39
|
+
public String getAsString(String name) {
|
40
|
+
try {
|
41
|
+
return (String) map.get(name);
|
42
|
+
} catch (Exception e) {
|
43
|
+
throw new RuntimeException(MessageFormat.format("name={0}", name), e);
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
public Long getAsLong(String name) {
|
48
|
+
try {
|
49
|
+
return (Long) map.get(name);
|
50
|
+
} catch (Exception e) {
|
51
|
+
throw new RuntimeException(MessageFormat.format("name={0}", name), e);
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
public Double getAsDouble(String name) {
|
56
|
+
try {
|
57
|
+
return (Double) map.get(name);
|
58
|
+
} catch (Exception e) {
|
59
|
+
throw new RuntimeException(MessageFormat.format("name={0}", name), e);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
public Boolean getAsBoolean(String name) {
|
64
|
+
try {
|
65
|
+
return (Boolean) map.get(name);
|
66
|
+
} catch (Exception e) {
|
67
|
+
throw new RuntimeException(MessageFormat.format("name={0}", name), e);
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
public Timestamp getAsTimestamp(String name) {
|
72
|
+
try {
|
73
|
+
return (Timestamp) map.get(name);
|
74
|
+
} catch (Exception e) {
|
75
|
+
throw new RuntimeException(MessageFormat.format("name={0}", name), e);
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public String toString() {
|
81
|
+
return map.toString();
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
private final List<OutputRecord> result = new CopyOnWriteArrayList<>();
|
86
|
+
|
87
|
+
@Override
|
88
|
+
public ConfigDiff transaction(ConfigSource config, Schema schema, int taskCount, OutputPlugin.Control control) {
|
89
|
+
final PluginTask task = config.loadConfig(PluginTask.class);
|
90
|
+
return resume(task.dump(), schema, taskCount, control);
|
91
|
+
}
|
92
|
+
|
93
|
+
@Override
|
94
|
+
public ConfigDiff resume(TaskSource taskSource, Schema schema, int taskCount, OutputPlugin.Control control) {
|
95
|
+
control.run(taskSource);
|
96
|
+
return Exec.newConfigDiff();
|
97
|
+
}
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public void cleanup(TaskSource taskSource, Schema schema, int taskCount, List<TaskReport> successTaskReports) {
|
101
|
+
}
|
102
|
+
|
103
|
+
@Override
|
104
|
+
public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int taskIndex) {
|
105
|
+
return new TransactionalPageOutput() {
|
106
|
+
private final PageReader reader = new PageReader(schema);
|
107
|
+
|
108
|
+
@Override
|
109
|
+
public void add(Page page) {
|
110
|
+
reader.setPage(page);
|
111
|
+
while (reader.nextRecord()) {
|
112
|
+
final OutputRecord record = new OutputRecord();
|
113
|
+
for (Column column : schema.getColumns()) {
|
114
|
+
column.visit(new ColumnVisitor() {
|
115
|
+
|
116
|
+
@Override
|
117
|
+
public void timestampColumn(Column column) {
|
118
|
+
if (reader.isNull(column)) {
|
119
|
+
record.set(column.getName(), null);
|
120
|
+
return;
|
121
|
+
}
|
122
|
+
record.set(column.getName(), reader.getTimestamp(column));
|
123
|
+
}
|
124
|
+
|
125
|
+
@Override
|
126
|
+
public void stringColumn(Column column) {
|
127
|
+
if (reader.isNull(column)) {
|
128
|
+
record.set(column.getName(), null);
|
129
|
+
return;
|
130
|
+
}
|
131
|
+
record.set(column.getName(), reader.getString(column));
|
132
|
+
}
|
133
|
+
|
134
|
+
@Override
|
135
|
+
public void longColumn(Column column) {
|
136
|
+
if (reader.isNull(column)) {
|
137
|
+
record.set(column.getName(), null);
|
138
|
+
return;
|
139
|
+
}
|
140
|
+
record.set(column.getName(), reader.getLong(column));
|
141
|
+
}
|
142
|
+
|
143
|
+
@Override
|
144
|
+
public void doubleColumn(Column column) {
|
145
|
+
if (reader.isNull(column)) {
|
146
|
+
record.set(column.getName(), null);
|
147
|
+
return;
|
148
|
+
}
|
149
|
+
record.set(column.getName(), reader.getDouble(column));
|
150
|
+
}
|
151
|
+
|
152
|
+
@Override
|
153
|
+
public void booleanColumn(Column column) {
|
154
|
+
if (reader.isNull(column)) {
|
155
|
+
record.set(column.getName(), null);
|
156
|
+
return;
|
157
|
+
}
|
158
|
+
record.set(column.getName(), reader.getBoolean(column));
|
159
|
+
}
|
160
|
+
});
|
161
|
+
}
|
162
|
+
result.add(record);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
@Override
|
167
|
+
public void finish() {
|
168
|
+
}
|
169
|
+
|
170
|
+
@Override
|
171
|
+
public void close() {
|
172
|
+
reader.close();
|
173
|
+
}
|
174
|
+
|
175
|
+
@Override
|
176
|
+
public void abort() {
|
177
|
+
}
|
178
|
+
|
179
|
+
@Override
|
180
|
+
public TaskReport commit() {
|
181
|
+
return Exec.newTaskReport();
|
182
|
+
}
|
183
|
+
};
|
184
|
+
}
|
185
|
+
|
186
|
+
public void clearResult() {
|
187
|
+
result.clear();
|
188
|
+
}
|
189
|
+
|
190
|
+
public List<OutputRecord> getResult() {
|
191
|
+
return result;
|
192
|
+
}
|
193
|
+
}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
package org.embulk.parser;
|
2
|
+
|
3
|
+
import java.util.ArrayList;
|
4
|
+
import java.util.HashMap;
|
5
|
+
import java.util.List;
|
6
|
+
|
7
|
+
@SuppressWarnings("serial")
|
8
|
+
public class EmbulkTestParserConfig extends HashMap<String, Object> {
|
9
|
+
|
10
|
+
public void setType(String type) {
|
11
|
+
set("type", type);
|
12
|
+
}
|
13
|
+
|
14
|
+
public void set(String key, Object value) {
|
15
|
+
if (value == null) {
|
16
|
+
super.remove(key);
|
17
|
+
} else {
|
18
|
+
super.put(key, value);
|
19
|
+
}
|
20
|
+
}
|
21
|
+
|
22
|
+
public List<EmbulkTestColumn> getColumns() {
|
23
|
+
@SuppressWarnings("unchecked")
|
24
|
+
List<EmbulkTestColumn> columns = (List<EmbulkTestColumn>) super.get("columns");
|
25
|
+
if (columns == null) {
|
26
|
+
columns = new ArrayList<>();
|
27
|
+
super.put("columns", columns);
|
28
|
+
}
|
29
|
+
return columns;
|
30
|
+
}
|
31
|
+
|
32
|
+
public EmbulkTestColumn addColumn(String name, String type) {
|
33
|
+
EmbulkTestColumn column = new EmbulkTestColumn();
|
34
|
+
column.set("name", name);
|
35
|
+
column.set("type", type);
|
36
|
+
getColumns().add(column);
|
37
|
+
return column;
|
38
|
+
}
|
39
|
+
|
40
|
+
public static class EmbulkTestColumn extends HashMap<String, Object> {
|
41
|
+
|
42
|
+
public EmbulkTestColumn set(String key, Object value) {
|
43
|
+
if (value == null) {
|
44
|
+
super.remove(key);
|
45
|
+
} else {
|
46
|
+
super.put(key, value);
|
47
|
+
}
|
48
|
+
return this;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|