embulk-parser-poi_excel 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +4 -0
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +4 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +17 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +2 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java +38 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +15 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorFactory.java +14 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java +25 -2
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java +26 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java +78 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ab71cb044d2238a08719c80b1092f50cce9b84e
|
4
|
+
data.tar.gz: c02f1e64148806369e8aca0e11a86daba48f134f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0966c93aba6475e4c77a1a987be10a0413025bb96f7925b9f41413802ad9217c1366b06b1378369e7ee9dcad8b70ca843532cac70d606c08aa0553a9195f108e
|
7
|
+
data.tar.gz: 06a1f63b2ba99e9f88410e72d1a29b5b3eb3b221394674a1568b9d4ed32a3292a95e93ed6639c2f26b6be513ee0f15d0199e96ee7b35ff4ba9082947f48908c5
|
data/README.md
CHANGED
@@ -44,6 +44,7 @@ if omit **column_number** when **value** is `cell_value`, specified next column.
|
|
44
44
|
* **type**: Embulk column type. (string, required)
|
45
45
|
* **value**: value type. see below. (string, defualt: `cell_value`)
|
46
46
|
* **column_number**: Excel column number. see below. (string, default: next column)
|
47
|
+
* **numeric_format**: format of numeric(double) to string such as `%4.2f`. (default: Java's Double.toString())
|
47
48
|
* **attribute_name**: use with value `cell_style`, `cell_font`, etc. see below. (list of string)
|
48
49
|
* **on_cell_error**: processing method of Cell error. see below. (string, default: `constant`)
|
49
50
|
* **formula_handling** : processing method of formula. see below. (`evaluate` or `cashed_value`. default: `evaluate`)
|
@@ -58,6 +59,8 @@ if omit **column_number** when **value** is `cell_value`, specified next column.
|
|
58
59
|
* `cell_style`: all cell style attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
59
60
|
* `cell_font`: all cell font attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
60
61
|
* `cell_comment`: all cell comment attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
62
|
+
* `cell_type`: cell type. returned Cell.getCellType() of POI.
|
63
|
+
* `cell_cached_type`: cell cached formula result type. returned Cell.getCachedFormulaResultType() of POI when CellType==FORMULA, otherwise same as cell_type(returned Cell.getCellType()).
|
61
64
|
* `sheet_name`: sheet name.
|
62
65
|
* `row_number`: row number(1 origin).
|
63
66
|
* `column_number`: column number(1 origin).
|
data/build.gradle
CHANGED
@@ -11,6 +11,10 @@ public enum PoiExcelColumnValueType {
|
|
11
11
|
CELL_FONT(true, false),
|
12
12
|
/** cell comment */
|
13
13
|
CELL_COMMENT(true, false),
|
14
|
+
/** cell type */
|
15
|
+
CELL_TYPE(true, false),
|
16
|
+
/** cell CachedFormulaResultType */
|
17
|
+
CELL_CACHED_TYPE(true, false),
|
14
18
|
/** sheet name */
|
15
19
|
SHEET_NAME(false, false),
|
16
20
|
/** row number (1 origin) */
|
@@ -103,6 +103,10 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
103
103
|
}
|
104
104
|
|
105
105
|
public interface ColumnCommonOptionTask extends Task {
|
106
|
+
// format of numeric(double) to string
|
107
|
+
@Config("numeric_format")
|
108
|
+
@ConfigDefault("null")
|
109
|
+
public Optional<String> getNumericFormat();
|
106
110
|
|
107
111
|
// search merged cell if cellType=BLANK
|
108
112
|
@Config("search_merged_cell")
|
@@ -231,6 +231,23 @@ public class PoiExcelColumnBean {
|
|
231
231
|
return attributeName.get();
|
232
232
|
}
|
233
233
|
|
234
|
+
private CacheValue<String> numericFormat = new CacheValue<String>() {
|
235
|
+
|
236
|
+
@Override
|
237
|
+
protected Optional<String> getTaskValue(ColumnCommonOptionTask task) {
|
238
|
+
return task.getNumericFormat();
|
239
|
+
}
|
240
|
+
|
241
|
+
@Override
|
242
|
+
protected String getDefaultValue() {
|
243
|
+
return "";
|
244
|
+
}
|
245
|
+
};
|
246
|
+
|
247
|
+
public String getNumericFormat() {
|
248
|
+
return numericFormat.get();
|
249
|
+
}
|
250
|
+
|
234
251
|
private CacheValue<Boolean> searchMergedCell = new CacheValue<Boolean>() {
|
235
252
|
|
236
253
|
@Override
|
@@ -51,6 +51,8 @@ public class PoiExcelColumnIndex {
|
|
51
51
|
break;
|
52
52
|
case CELL_VALUE:
|
53
53
|
case CELL_FORMULA:
|
54
|
+
case CELL_TYPE:
|
55
|
+
case CELL_CACHED_TYPE:
|
54
56
|
case COLUMN_NUMBER:
|
55
57
|
log.info("column.name={} <- cell_column={}, value_type={}", column.getName(), c, valueType);
|
56
58
|
break;
|
@@ -0,0 +1,38 @@
|
|
1
|
+
package org.embulk.parser.poi_excel.visitor;
|
2
|
+
|
3
|
+
import org.apache.poi.ss.usermodel.Cell;
|
4
|
+
import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
|
5
|
+
import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor;
|
6
|
+
import org.embulk.spi.Column;
|
7
|
+
import org.embulk.spi.PageBuilder;
|
8
|
+
import org.embulk.spi.type.StringType;
|
9
|
+
|
10
|
+
public class PoiExcelCellTypeVisitor {
|
11
|
+
protected final PoiExcelVisitorValue visitorValue;
|
12
|
+
protected final PageBuilder pageBuilder;
|
13
|
+
|
14
|
+
public PoiExcelCellTypeVisitor(PoiExcelVisitorValue visitorValue) {
|
15
|
+
this.visitorValue = visitorValue;
|
16
|
+
this.pageBuilder = visitorValue.getPageBuilder();
|
17
|
+
}
|
18
|
+
|
19
|
+
private static final String[] CELL_TYPE_STRING = { "NUMERIC", "STRING", "FORMULA", "BLANK", "BOOLEAN", "ERROR" };
|
20
|
+
|
21
|
+
public void visit(PoiExcelColumnBean bean, Cell cell, int cellType, CellVisitor visitor) {
|
22
|
+
assert cell != null;
|
23
|
+
|
24
|
+
Column column = bean.getColumn();
|
25
|
+
if (column.getType() instanceof StringType) {
|
26
|
+
String type;
|
27
|
+
if (0 <= cellType && cellType < CELL_TYPE_STRING.length) {
|
28
|
+
type = CELL_TYPE_STRING[cellType];
|
29
|
+
} else {
|
30
|
+
type = Integer.toString(cellType);
|
31
|
+
}
|
32
|
+
visitor.visitCellValueString(column, cell, type);
|
33
|
+
return;
|
34
|
+
}
|
35
|
+
|
36
|
+
visitor.visitCellValueNumeric(column, cell, cellType);
|
37
|
+
}
|
38
|
+
}
|
@@ -117,6 +117,16 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
|
|
117
117
|
case CELL_COMMENT:
|
118
118
|
visitCellComment(bean, cell, visitor);
|
119
119
|
return;
|
120
|
+
case CELL_TYPE:
|
121
|
+
visitCellType(bean, cell, cell.getCellType(), visitor);
|
122
|
+
return;
|
123
|
+
case CELL_CACHED_TYPE:
|
124
|
+
if (cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
|
125
|
+
visitCellType(bean, cell, cell.getCachedFormulaResultType(), visitor);
|
126
|
+
} else {
|
127
|
+
visitCellType(bean, cell, cell.getCellType(), visitor);
|
128
|
+
}
|
129
|
+
return;
|
120
130
|
default:
|
121
131
|
throw new UnsupportedOperationException(MessageFormat.format("unsupported value_type={0}", valueType));
|
122
132
|
}
|
@@ -153,4 +163,9 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
|
|
153
163
|
PoiExcelCellCommentVisitor delegator = factory.getPoiExcelCellCommentVisitor();
|
154
164
|
delegator.visit(bean, cell, visitor);
|
155
165
|
}
|
166
|
+
|
167
|
+
private void visitCellType(PoiExcelColumnBean bean, Cell cell, int cellType, CellVisitor visitor) {
|
168
|
+
PoiExcelCellTypeVisitor delegator = factory.getPoiExcelCellTypeVisitor();
|
169
|
+
delegator.visit(bean, cell, cellType, visitor);
|
170
|
+
}
|
156
171
|
}
|
@@ -159,6 +159,20 @@ public class PoiExcelVisitorFactory {
|
|
159
159
|
return new PoiExcelCellCommentVisitor(visitorValue);
|
160
160
|
}
|
161
161
|
|
162
|
+
// cell type
|
163
|
+
private PoiExcelCellTypeVisitor poiExcelCellTypeVisitor;
|
164
|
+
|
165
|
+
public final PoiExcelCellTypeVisitor getPoiExcelCellTypeVisitor() {
|
166
|
+
if (poiExcelCellTypeVisitor == null) {
|
167
|
+
poiExcelCellTypeVisitor = newPoiExcelCellTypeVisitor();
|
168
|
+
}
|
169
|
+
return poiExcelCellTypeVisitor;
|
170
|
+
}
|
171
|
+
|
172
|
+
protected PoiExcelCellTypeVisitor newPoiExcelCellTypeVisitor() {
|
173
|
+
return new PoiExcelCellTypeVisitor(visitorValue);
|
174
|
+
}
|
175
|
+
|
162
176
|
// ClientAnchor
|
163
177
|
private PoiExcelClientAnchorVisitor poiExcelClientAnchorVisitor;
|
164
178
|
|
@@ -1,8 +1,11 @@
|
|
1
1
|
package org.embulk.parser.poi_excel.visitor.embulk;
|
2
2
|
|
3
|
+
import java.text.MessageFormat;
|
4
|
+
|
3
5
|
import org.apache.poi.ss.usermodel.FormulaError;
|
4
6
|
import org.apache.poi.ss.usermodel.Sheet;
|
5
7
|
import org.apache.poi.ss.util.CellReference;
|
8
|
+
import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
|
6
9
|
import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue;
|
7
10
|
import org.embulk.spi.Column;
|
8
11
|
|
@@ -14,11 +17,31 @@ public class StringCellVisitor extends CellVisitor {
|
|
14
17
|
|
15
18
|
@Override
|
16
19
|
public void visitCellValueNumeric(Column column, Object source, double value) {
|
20
|
+
String s = toString(column, source, value);
|
21
|
+
pageBuilder.setString(column, s);
|
22
|
+
}
|
23
|
+
|
24
|
+
protected String toString(Column column, Object source, double value) {
|
25
|
+
String format = getNumericFormat(column);
|
26
|
+
if (!format.isEmpty()) {
|
27
|
+
try {
|
28
|
+
return String.format(format, value);
|
29
|
+
} catch (Exception e) {
|
30
|
+
throw new IllegalArgumentException(MessageFormat.format(
|
31
|
+
"illegal String.format for double. numeric_format=\"{0}\"", format), e);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
17
35
|
String s = Double.toString(value);
|
18
36
|
if (s.endsWith(".0")) {
|
19
|
-
|
37
|
+
return s.substring(0, s.length() - 2);
|
20
38
|
}
|
21
|
-
|
39
|
+
return s;
|
40
|
+
}
|
41
|
+
|
42
|
+
protected String getNumericFormat(Column column) {
|
43
|
+
PoiExcelColumnBean bean = visitorValue.getColumnBean(column);
|
44
|
+
return bean.getNumericFormat();
|
22
45
|
}
|
23
46
|
|
24
47
|
@Override
|
@@ -73,6 +73,32 @@ public class TestPoiExcelParserPlugin {
|
|
73
73
|
assertThat(r.getAsTimestamp("timestamp"), is(timestamp));
|
74
74
|
}
|
75
75
|
|
76
|
+
@Theory
|
77
|
+
public void testNumricFormat(String excelFile) throws ParseException {
|
78
|
+
try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
|
79
|
+
tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
|
80
|
+
|
81
|
+
EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
|
82
|
+
parser.set("sheets", Arrays.asList("test1"));
|
83
|
+
parser.set("skip_header_lines", 1);
|
84
|
+
parser.addColumn("value", "string").set("column_number", "C").set("numeric_format", "%.2f");
|
85
|
+
|
86
|
+
URL inFile = getClass().getResource(excelFile);
|
87
|
+
List<OutputRecord> result = tester.runParser(inFile, parser);
|
88
|
+
|
89
|
+
assertThat(result.size(), is(7));
|
90
|
+
checkNumricFormat(result, 0, "123.40");
|
91
|
+
checkNumricFormat(result, 1, "456.70");
|
92
|
+
checkNumricFormat(result, 2, "123.00");
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
private void checkNumricFormat(List<OutputRecord> result, int index, String s) {
|
97
|
+
OutputRecord r = result.get(index);
|
98
|
+
// System.out.println(r);
|
99
|
+
assertThat(r.getAsString("value"), is(s));
|
100
|
+
}
|
101
|
+
|
76
102
|
@Theory
|
77
103
|
public void testRowNumber(String excelFile) throws ParseException {
|
78
104
|
try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
|
@@ -0,0 +1,78 @@
|
|
1
|
+
package org.embulk.parser.poi_excel;
|
2
|
+
|
3
|
+
import static org.hamcrest.CoreMatchers.is;
|
4
|
+
import static org.junit.Assert.assertThat;
|
5
|
+
|
6
|
+
import java.net.URL;
|
7
|
+
import java.text.ParseException;
|
8
|
+
import java.util.Arrays;
|
9
|
+
import java.util.List;
|
10
|
+
|
11
|
+
import org.apache.poi.ss.usermodel.Cell;
|
12
|
+
import org.embulk.parser.EmbulkPluginTester;
|
13
|
+
import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord;
|
14
|
+
import org.embulk.parser.EmbulkTestParserConfig;
|
15
|
+
import org.junit.experimental.theories.DataPoints;
|
16
|
+
import org.junit.experimental.theories.Theories;
|
17
|
+
import org.junit.experimental.theories.Theory;
|
18
|
+
import org.junit.runner.RunWith;
|
19
|
+
|
20
|
+
@RunWith(Theories.class)
|
21
|
+
public class TestPoiExcelParserPlugin_cellType {
|
22
|
+
|
23
|
+
@DataPoints
|
24
|
+
public static String[] FILES = { "test1.xls", "test2.xlsx" };
|
25
|
+
|
26
|
+
@Theory
|
27
|
+
public void testCellType(String excelFile) throws ParseException {
|
28
|
+
try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
|
29
|
+
tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
|
30
|
+
|
31
|
+
EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
|
32
|
+
parser.set("sheets", Arrays.asList("test1"));
|
33
|
+
parser.set("skip_header_lines", 3);
|
34
|
+
parser.addColumn("long", "long").set("column_number", "A").set("value", "cell_type");
|
35
|
+
parser.addColumn("string", "string").set("column_number", "A").set("value", "cell_type");
|
36
|
+
|
37
|
+
URL inFile = getClass().getResource(excelFile);
|
38
|
+
List<OutputRecord> result = tester.runParser(inFile, parser);
|
39
|
+
|
40
|
+
assertThat(result.size(), is(5));
|
41
|
+
check1(result, 0, Cell.CELL_TYPE_NUMERIC, "NUMERIC");
|
42
|
+
check1(result, 1, Cell.CELL_TYPE_STRING, "STRING");
|
43
|
+
check1(result, 2, Cell.CELL_TYPE_FORMULA, "FORMULA");
|
44
|
+
check1(result, 3, Cell.CELL_TYPE_BOOLEAN, "BOOLEAN");
|
45
|
+
check1(result, 4, Cell.CELL_TYPE_FORMULA, "FORMULA");
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
@Theory
|
50
|
+
public void testCellCachedType(String excelFile) throws ParseException {
|
51
|
+
try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
|
52
|
+
tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
|
53
|
+
|
54
|
+
EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
|
55
|
+
parser.set("sheets", Arrays.asList("test1"));
|
56
|
+
parser.set("skip_header_lines", 3);
|
57
|
+
parser.addColumn("long", "long").set("column_number", "A").set("value", "cell_cached_type");
|
58
|
+
parser.addColumn("string", "string").set("column_number", "A").set("value", "cell_cached_type");
|
59
|
+
|
60
|
+
URL inFile = getClass().getResource(excelFile);
|
61
|
+
List<OutputRecord> result = tester.runParser(inFile, parser);
|
62
|
+
|
63
|
+
assertThat(result.size(), is(5));
|
64
|
+
check1(result, 0, Cell.CELL_TYPE_NUMERIC, "NUMERIC");
|
65
|
+
check1(result, 1, Cell.CELL_TYPE_STRING, "STRING");
|
66
|
+
check1(result, 2, Cell.CELL_TYPE_BOOLEAN, "BOOLEAN");
|
67
|
+
check1(result, 3, Cell.CELL_TYPE_BOOLEAN, "BOOLEAN");
|
68
|
+
check1(result, 4, Cell.CELL_TYPE_ERROR, "ERROR");
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
private void check1(List<OutputRecord> result, int index, long l, String s) throws ParseException {
|
73
|
+
OutputRecord r = result.get(index);
|
74
|
+
// System.out.println(r);
|
75
|
+
assertThat(r.getAsLong("long"), is(l));
|
76
|
+
assertThat(r.getAsString("string"), is(s));
|
77
|
+
}
|
78
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-poi_excel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hishidama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-02-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -64,6 +64,7 @@ files:
|
|
64
64
|
- src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellCommentVisitor.java
|
65
65
|
- src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java
|
66
66
|
- src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java
|
67
|
+
- src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java
|
67
68
|
- src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java
|
68
69
|
- src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java
|
69
70
|
- src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColorVisitor.java
|
@@ -85,6 +86,7 @@ files:
|
|
85
86
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java
|
86
87
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java
|
87
88
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java
|
89
|
+
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java
|
88
90
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java
|
89
91
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java
|
90
92
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java
|
@@ -93,7 +95,7 @@ files:
|
|
93
95
|
- src/test/resources/org/embulk/parser/poi_excel/test1.xls
|
94
96
|
- src/test/resources/org/embulk/parser/poi_excel/test2.xlsx
|
95
97
|
- classpath/commons-codec-1.9.jar
|
96
|
-
- classpath/embulk-parser-poi_excel-0.1.
|
98
|
+
- classpath/embulk-parser-poi_excel-0.1.7.jar
|
97
99
|
- classpath/embulk-standards-0.7.5.jar
|
98
100
|
- classpath/poi-3.13.jar
|
99
101
|
- classpath/poi-ooxml-3.13.jar
|