embulk-parser-poi_excel 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a6920bea2e679cbcc53f02a2c70485ba3d4b350
4
- data.tar.gz: a83e9e1a2cb8ef78da34d7ef5602b66b3f2e48c8
3
+ metadata.gz: 3ab71cb044d2238a08719c80b1092f50cce9b84e
4
+ data.tar.gz: c02f1e64148806369e8aca0e11a86daba48f134f
5
5
  SHA512:
6
- metadata.gz: 33920df4d360e86b5609f3939c63762b2f1d9b87a16f1923cf59ab5f16060ce1a2070c860e66d46fd50631a6a08f7677c7f32cb3dfc33e2084f1aca6fab75fe7
7
- data.tar.gz: 043b272a531e072b07033c43a014021c8cf9e1e65a7e12f7079341cff35adf6dc429e30c3da3415f994698fa02aa401fd527771469094db93583408c081c2c31
6
+ metadata.gz: 0966c93aba6475e4c77a1a987be10a0413025bb96f7925b9f41413802ad9217c1366b06b1378369e7ee9dcad8b70ca843532cac70d606c08aa0553a9195f108e
7
+ data.tar.gz: 06a1f63b2ba99e9f88410e72d1a29b5b3eb3b221394674a1568b9d4ed32a3292a95e93ed6639c2f26b6be513ee0f15d0199e96ee7b35ff4ba9082947f48908c5
data/README.md CHANGED
@@ -44,6 +44,7 @@ if omit **column_number** when **value** is `cell_value`, specified next column.
44
44
  * **type**: Embulk column type. (string, required)
45
45
  * **value**: value type. see below. (string, defualt: `cell_value`)
46
46
  * **column_number**: Excel column number. see below. (string, default: next column)
47
+ * **numeric_format**: format of numeric(double) to string such as `%4.2f`. (default: Java's Double.toString())
47
48
  * **attribute_name**: use with value `cell_style`, `cell_font`, etc. see below. (list of string)
48
49
  * **on_cell_error**: processing method of Cell error. see below. (string, default: `constant`)
49
50
  * **formula_handling** : processing method of formula. see below. (`evaluate` or `cashed_value`. default: `evaluate`)
@@ -58,6 +59,8 @@ if omit **column_number** when **value** is `cell_value`, specified next column.
58
59
  * `cell_style`: all cell style attributes. returned json string. see **attribute_name**. (**type** required `string`)
59
60
  * `cell_font`: all cell font attributes. returned json string. see **attribute_name**. (**type** required `string`)
60
61
  * `cell_comment`: all cell comment attributes. returned json string. see **attribute_name**. (**type** required `string`)
62
+ * `cell_type`: cell type. returned Cell.getCellType() of POI.
63
+ * `cell_cached_type`: cell cached formula result type. returned Cell.getCachedFormulaResultType() of POI when CellType==FORMULA, otherwise same as cell_type(returned Cell.getCellType()).
61
64
  * `sheet_name`: sheet name.
62
65
  * `row_number`: row number(1 origin).
63
66
  * `column_number`: column number(1 origin).
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.6"
16
+ version = "0.1.7"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -11,6 +11,10 @@ public enum PoiExcelColumnValueType {
11
11
  CELL_FONT(true, false),
12
12
  /** cell comment */
13
13
  CELL_COMMENT(true, false),
14
+ /** cell type */
15
+ CELL_TYPE(true, false),
16
+ /** cell CachedFormulaResultType */
17
+ CELL_CACHED_TYPE(true, false),
14
18
  /** sheet name */
15
19
  SHEET_NAME(false, false),
16
20
  /** row number (1 origin) */
@@ -103,6 +103,10 @@ public class PoiExcelParserPlugin implements ParserPlugin {
103
103
  }
104
104
 
105
105
  public interface ColumnCommonOptionTask extends Task {
106
+ // format of numeric(double) to string
107
+ @Config("numeric_format")
108
+ @ConfigDefault("null")
109
+ public Optional<String> getNumericFormat();
106
110
 
107
111
  // search merged cell if cellType=BLANK
108
112
  @Config("search_merged_cell")
@@ -231,6 +231,23 @@ public class PoiExcelColumnBean {
231
231
  return attributeName.get();
232
232
  }
233
233
 
234
+ private CacheValue<String> numericFormat = new CacheValue<String>() {
235
+
236
+ @Override
237
+ protected Optional<String> getTaskValue(ColumnCommonOptionTask task) {
238
+ return task.getNumericFormat();
239
+ }
240
+
241
+ @Override
242
+ protected String getDefaultValue() {
243
+ return "";
244
+ }
245
+ };
246
+
247
+ public String getNumericFormat() {
248
+ return numericFormat.get();
249
+ }
250
+
234
251
  private CacheValue<Boolean> searchMergedCell = new CacheValue<Boolean>() {
235
252
 
236
253
  @Override
@@ -51,6 +51,8 @@ public class PoiExcelColumnIndex {
51
51
  break;
52
52
  case CELL_VALUE:
53
53
  case CELL_FORMULA:
54
+ case CELL_TYPE:
55
+ case CELL_CACHED_TYPE:
54
56
  case COLUMN_NUMBER:
55
57
  log.info("column.name={} <- cell_column={}, value_type={}", column.getName(), c, valueType);
56
58
  break;
@@ -0,0 +1,38 @@
1
+ package org.embulk.parser.poi_excel.visitor;
2
+
3
+ import org.apache.poi.ss.usermodel.Cell;
4
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
5
+ import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor;
6
+ import org.embulk.spi.Column;
7
+ import org.embulk.spi.PageBuilder;
8
+ import org.embulk.spi.type.StringType;
9
+
10
+ public class PoiExcelCellTypeVisitor {
11
+ protected final PoiExcelVisitorValue visitorValue;
12
+ protected final PageBuilder pageBuilder;
13
+
14
+ public PoiExcelCellTypeVisitor(PoiExcelVisitorValue visitorValue) {
15
+ this.visitorValue = visitorValue;
16
+ this.pageBuilder = visitorValue.getPageBuilder();
17
+ }
18
+
19
+ private static final String[] CELL_TYPE_STRING = { "NUMERIC", "STRING", "FORMULA", "BLANK", "BOOLEAN", "ERROR" };
20
+
21
+ public void visit(PoiExcelColumnBean bean, Cell cell, int cellType, CellVisitor visitor) {
22
+ assert cell != null;
23
+
24
+ Column column = bean.getColumn();
25
+ if (column.getType() instanceof StringType) {
26
+ String type;
27
+ if (0 <= cellType && cellType < CELL_TYPE_STRING.length) {
28
+ type = CELL_TYPE_STRING[cellType];
29
+ } else {
30
+ type = Integer.toString(cellType);
31
+ }
32
+ visitor.visitCellValueString(column, cell, type);
33
+ return;
34
+ }
35
+
36
+ visitor.visitCellValueNumeric(column, cell, cellType);
37
+ }
38
+ }
@@ -117,6 +117,16 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
117
117
  case CELL_COMMENT:
118
118
  visitCellComment(bean, cell, visitor);
119
119
  return;
120
+ case CELL_TYPE:
121
+ visitCellType(bean, cell, cell.getCellType(), visitor);
122
+ return;
123
+ case CELL_CACHED_TYPE:
124
+ if (cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
125
+ visitCellType(bean, cell, cell.getCachedFormulaResultType(), visitor);
126
+ } else {
127
+ visitCellType(bean, cell, cell.getCellType(), visitor);
128
+ }
129
+ return;
120
130
  default:
121
131
  throw new UnsupportedOperationException(MessageFormat.format("unsupported value_type={0}", valueType));
122
132
  }
@@ -153,4 +163,9 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
153
163
  PoiExcelCellCommentVisitor delegator = factory.getPoiExcelCellCommentVisitor();
154
164
  delegator.visit(bean, cell, visitor);
155
165
  }
166
+
167
+ private void visitCellType(PoiExcelColumnBean bean, Cell cell, int cellType, CellVisitor visitor) {
168
+ PoiExcelCellTypeVisitor delegator = factory.getPoiExcelCellTypeVisitor();
169
+ delegator.visit(bean, cell, cellType, visitor);
170
+ }
156
171
  }
@@ -159,6 +159,20 @@ public class PoiExcelVisitorFactory {
159
159
  return new PoiExcelCellCommentVisitor(visitorValue);
160
160
  }
161
161
 
162
+ // cell type
163
+ private PoiExcelCellTypeVisitor poiExcelCellTypeVisitor;
164
+
165
+ public final PoiExcelCellTypeVisitor getPoiExcelCellTypeVisitor() {
166
+ if (poiExcelCellTypeVisitor == null) {
167
+ poiExcelCellTypeVisitor = newPoiExcelCellTypeVisitor();
168
+ }
169
+ return poiExcelCellTypeVisitor;
170
+ }
171
+
172
+ protected PoiExcelCellTypeVisitor newPoiExcelCellTypeVisitor() {
173
+ return new PoiExcelCellTypeVisitor(visitorValue);
174
+ }
175
+
162
176
  // ClientAnchor
163
177
  private PoiExcelClientAnchorVisitor poiExcelClientAnchorVisitor;
164
178
 
@@ -1,8 +1,11 @@
1
1
  package org.embulk.parser.poi_excel.visitor.embulk;
2
2
 
3
+ import java.text.MessageFormat;
4
+
3
5
  import org.apache.poi.ss.usermodel.FormulaError;
4
6
  import org.apache.poi.ss.usermodel.Sheet;
5
7
  import org.apache.poi.ss.util.CellReference;
8
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
6
9
  import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue;
7
10
  import org.embulk.spi.Column;
8
11
 
@@ -14,11 +17,31 @@ public class StringCellVisitor extends CellVisitor {
14
17
 
15
18
  @Override
16
19
  public void visitCellValueNumeric(Column column, Object source, double value) {
20
+ String s = toString(column, source, value);
21
+ pageBuilder.setString(column, s);
22
+ }
23
+
24
+ protected String toString(Column column, Object source, double value) {
25
+ String format = getNumericFormat(column);
26
+ if (!format.isEmpty()) {
27
+ try {
28
+ return String.format(format, value);
29
+ } catch (Exception e) {
30
+ throw new IllegalArgumentException(MessageFormat.format(
31
+ "illegal String.format for double. numeric_format=\"{0}\"", format), e);
32
+ }
33
+ }
34
+
17
35
  String s = Double.toString(value);
18
36
  if (s.endsWith(".0")) {
19
- s = s.substring(0, s.length() - 2);
37
+ return s.substring(0, s.length() - 2);
20
38
  }
21
- pageBuilder.setString(column, s);
39
+ return s;
40
+ }
41
+
42
+ protected String getNumericFormat(Column column) {
43
+ PoiExcelColumnBean bean = visitorValue.getColumnBean(column);
44
+ return bean.getNumericFormat();
22
45
  }
23
46
 
24
47
  @Override
@@ -73,6 +73,32 @@ public class TestPoiExcelParserPlugin {
73
73
  assertThat(r.getAsTimestamp("timestamp"), is(timestamp));
74
74
  }
75
75
 
76
+ @Theory
77
+ public void testNumricFormat(String excelFile) throws ParseException {
78
+ try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
79
+ tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
80
+
81
+ EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
82
+ parser.set("sheets", Arrays.asList("test1"));
83
+ parser.set("skip_header_lines", 1);
84
+ parser.addColumn("value", "string").set("column_number", "C").set("numeric_format", "%.2f");
85
+
86
+ URL inFile = getClass().getResource(excelFile);
87
+ List<OutputRecord> result = tester.runParser(inFile, parser);
88
+
89
+ assertThat(result.size(), is(7));
90
+ checkNumricFormat(result, 0, "123.40");
91
+ checkNumricFormat(result, 1, "456.70");
92
+ checkNumricFormat(result, 2, "123.00");
93
+ }
94
+ }
95
+
96
+ private void checkNumricFormat(List<OutputRecord> result, int index, String s) {
97
+ OutputRecord r = result.get(index);
98
+ // System.out.println(r);
99
+ assertThat(r.getAsString("value"), is(s));
100
+ }
101
+
76
102
  @Theory
77
103
  public void testRowNumber(String excelFile) throws ParseException {
78
104
  try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
@@ -0,0 +1,78 @@
1
+ package org.embulk.parser.poi_excel;
2
+
3
+ import static org.hamcrest.CoreMatchers.is;
4
+ import static org.junit.Assert.assertThat;
5
+
6
+ import java.net.URL;
7
+ import java.text.ParseException;
8
+ import java.util.Arrays;
9
+ import java.util.List;
10
+
11
+ import org.apache.poi.ss.usermodel.Cell;
12
+ import org.embulk.parser.EmbulkPluginTester;
13
+ import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord;
14
+ import org.embulk.parser.EmbulkTestParserConfig;
15
+ import org.junit.experimental.theories.DataPoints;
16
+ import org.junit.experimental.theories.Theories;
17
+ import org.junit.experimental.theories.Theory;
18
+ import org.junit.runner.RunWith;
19
+
20
+ @RunWith(Theories.class)
21
+ public class TestPoiExcelParserPlugin_cellType {
22
+
23
+ @DataPoints
24
+ public static String[] FILES = { "test1.xls", "test2.xlsx" };
25
+
26
+ @Theory
27
+ public void testCellType(String excelFile) throws ParseException {
28
+ try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
29
+ tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
30
+
31
+ EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
32
+ parser.set("sheets", Arrays.asList("test1"));
33
+ parser.set("skip_header_lines", 3);
34
+ parser.addColumn("long", "long").set("column_number", "A").set("value", "cell_type");
35
+ parser.addColumn("string", "string").set("column_number", "A").set("value", "cell_type");
36
+
37
+ URL inFile = getClass().getResource(excelFile);
38
+ List<OutputRecord> result = tester.runParser(inFile, parser);
39
+
40
+ assertThat(result.size(), is(5));
41
+ check1(result, 0, Cell.CELL_TYPE_NUMERIC, "NUMERIC");
42
+ check1(result, 1, Cell.CELL_TYPE_STRING, "STRING");
43
+ check1(result, 2, Cell.CELL_TYPE_FORMULA, "FORMULA");
44
+ check1(result, 3, Cell.CELL_TYPE_BOOLEAN, "BOOLEAN");
45
+ check1(result, 4, Cell.CELL_TYPE_FORMULA, "FORMULA");
46
+ }
47
+ }
48
+
49
+ @Theory
50
+ public void testCellCachedType(String excelFile) throws ParseException {
51
+ try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
52
+ tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
53
+
54
+ EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
55
+ parser.set("sheets", Arrays.asList("test1"));
56
+ parser.set("skip_header_lines", 3);
57
+ parser.addColumn("long", "long").set("column_number", "A").set("value", "cell_cached_type");
58
+ parser.addColumn("string", "string").set("column_number", "A").set("value", "cell_cached_type");
59
+
60
+ URL inFile = getClass().getResource(excelFile);
61
+ List<OutputRecord> result = tester.runParser(inFile, parser);
62
+
63
+ assertThat(result.size(), is(5));
64
+ check1(result, 0, Cell.CELL_TYPE_NUMERIC, "NUMERIC");
65
+ check1(result, 1, Cell.CELL_TYPE_STRING, "STRING");
66
+ check1(result, 2, Cell.CELL_TYPE_BOOLEAN, "BOOLEAN");
67
+ check1(result, 3, Cell.CELL_TYPE_BOOLEAN, "BOOLEAN");
68
+ check1(result, 4, Cell.CELL_TYPE_ERROR, "ERROR");
69
+ }
70
+ }
71
+
72
+ private void check1(List<OutputRecord> result, int index, long l, String s) throws ParseException {
73
+ OutputRecord r = result.get(index);
74
+ // System.out.println(r);
75
+ assertThat(r.getAsLong("long"), is(l));
76
+ assertThat(r.getAsString("string"), is(s));
77
+ }
78
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-poi_excel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - hishidama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-28 00:00:00.000000000 Z
11
+ date: 2017-02-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -64,6 +64,7 @@ files:
64
64
  - src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellCommentVisitor.java
65
65
  - src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java
66
66
  - src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java
67
+ - src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java
67
68
  - src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java
68
69
  - src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java
69
70
  - src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColorVisitor.java
@@ -85,6 +86,7 @@ files:
85
86
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java
86
87
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java
87
88
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java
89
+ - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java
88
90
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java
89
91
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java
90
92
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java
@@ -93,7 +95,7 @@ files:
93
95
  - src/test/resources/org/embulk/parser/poi_excel/test1.xls
94
96
  - src/test/resources/org/embulk/parser/poi_excel/test2.xlsx
95
97
  - classpath/commons-codec-1.9.jar
96
- - classpath/embulk-parser-poi_excel-0.1.6.jar
98
+ - classpath/embulk-parser-poi_excel-0.1.7.jar
97
99
  - classpath/embulk-standards-0.7.5.jar
98
100
  - classpath/poi-3.13.jar
99
101
  - classpath/poi-ooxml-3.13.jar