embulk-parser-poi_excel 0.1.5 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +80 -21
- data/build.gradle +21 -11
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -6
- data/gradlew +43 -35
- data/gradlew.bat +4 -10
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +25 -3
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +102 -11
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +132 -6
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +167 -47
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelSheetBean.java +13 -1
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecord.java +52 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordColumn.java +80 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordRow.java +76 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordSheet.java +49 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/RecordType.java +114 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java +59 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/util/SearchMergedCell.java +71 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java +0 -6
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java +11 -11
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java +52 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java +87 -41
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java +1 -1
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +60 -12
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorFactory.java +14 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/BooleanCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +3 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/DoubleCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/LongCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java +30 -2
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/TimestampCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionFinder.java +9 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionList.java +20 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionMap.java +55 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionNothing.java +12 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java +27 -79
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellAddress.java +69 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellComment.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java +14 -14
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java +79 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_formula.java +90 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_mergedCell.java +94 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_recordType.java +192 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java +35 -1
- metadata +36 -17
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.parser.poi_excel;
|
2
2
|
|
3
|
+
import org.embulk.parser.poi_excel.bean.record.RecordType;
|
4
|
+
|
3
5
|
public enum PoiExcelColumnValueType {
|
4
6
|
/** cell value */
|
5
7
|
CELL_VALUE(true, true),
|
@@ -11,12 +13,32 @@ public enum PoiExcelColumnValueType {
|
|
11
13
|
CELL_FONT(true, false),
|
12
14
|
/** cell comment */
|
13
15
|
CELL_COMMENT(true, false),
|
16
|
+
/** cell type */
|
17
|
+
CELL_TYPE(true, false),
|
18
|
+
/** cell CachedFormulaResultType */
|
19
|
+
CELL_CACHED_TYPE(true, false),
|
14
20
|
/** sheet name */
|
15
21
|
SHEET_NAME(false, false),
|
16
22
|
/** row number (1 origin) */
|
17
|
-
ROW_NUMBER(false, false)
|
23
|
+
ROW_NUMBER(false, false) {
|
24
|
+
@Override
|
25
|
+
public boolean useCell(RecordType recordType) {
|
26
|
+
if (recordType == RecordType.COLUMN) {
|
27
|
+
return true;
|
28
|
+
}
|
29
|
+
return super.useCell(recordType);
|
30
|
+
}
|
31
|
+
},
|
18
32
|
/** column number (1 origin) */
|
19
|
-
COLUMN_NUMBER(true, false)
|
33
|
+
COLUMN_NUMBER(true, false) {
|
34
|
+
@Override
|
35
|
+
public boolean useCell(RecordType recordType) {
|
36
|
+
if (recordType == RecordType.ROW) {
|
37
|
+
return true;
|
38
|
+
}
|
39
|
+
return super.useCell(recordType);
|
40
|
+
}
|
41
|
+
},
|
20
42
|
/** constant */
|
21
43
|
CONSTANT(false, false);
|
22
44
|
|
@@ -28,7 +50,7 @@ public enum PoiExcelColumnValueType {
|
|
28
50
|
this.nextIndex = nextIndex;
|
29
51
|
}
|
30
52
|
|
31
|
-
public boolean useCell() {
|
53
|
+
public boolean useCell(RecordType recordType) {
|
32
54
|
return useCell;
|
33
55
|
}
|
34
56
|
|
@@ -2,12 +2,14 @@ package org.embulk.parser.poi_excel;
|
|
2
2
|
|
3
3
|
import java.io.IOException;
|
4
4
|
import java.util.ArrayList;
|
5
|
+
import java.util.LinkedHashSet;
|
5
6
|
import java.util.List;
|
6
7
|
import java.util.Map;
|
8
|
+
import java.util.Set;
|
9
|
+
import java.util.regex.Pattern;
|
7
10
|
|
8
11
|
import org.apache.poi.EncryptedDocumentException;
|
9
12
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
10
|
-
import org.apache.poi.ss.usermodel.Row;
|
11
13
|
import org.apache.poi.ss.usermodel.Sheet;
|
12
14
|
import org.apache.poi.ss.usermodel.Workbook;
|
13
15
|
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
@@ -17,6 +19,8 @@ import org.embulk.config.ConfigException;
|
|
17
19
|
import org.embulk.config.ConfigSource;
|
18
20
|
import org.embulk.config.Task;
|
19
21
|
import org.embulk.config.TaskSource;
|
22
|
+
import org.embulk.parser.poi_excel.bean.PoiExcelSheetBean;
|
23
|
+
import org.embulk.parser.poi_excel.bean.record.PoiExcelRecord;
|
20
24
|
import org.embulk.parser.poi_excel.visitor.PoiExcelColumnVisitor;
|
21
25
|
import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorFactory;
|
22
26
|
import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue;
|
@@ -66,6 +70,10 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
66
70
|
|
67
71
|
public interface SheetCommonOptionTask extends Task, ColumnCommonOptionTask {
|
68
72
|
|
73
|
+
@Config("record_type")
|
74
|
+
@ConfigDefault("null")
|
75
|
+
public Optional<String> getRecordType();
|
76
|
+
|
69
77
|
@Config("skip_header_lines")
|
70
78
|
@ConfigDefault("null")
|
71
79
|
public Optional<Integer> getSkipHeaderLines();
|
@@ -88,11 +96,30 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
88
96
|
@ConfigDefault("null")
|
89
97
|
public Optional<String> getValueType();
|
90
98
|
|
91
|
-
//
|
99
|
+
// same as cell_column
|
92
100
|
@Config("column_number")
|
93
101
|
@ConfigDefault("null")
|
94
102
|
public Optional<String> getColumnNumber();
|
95
103
|
|
104
|
+
public static final String CELL_COLUMN = "cell_column";
|
105
|
+
|
106
|
+
// A,B,... or number(1 origin)
|
107
|
+
@Config(CELL_COLUMN)
|
108
|
+
@ConfigDefault("null")
|
109
|
+
public Optional<String> getCellColumn();
|
110
|
+
|
111
|
+
public static final String CELL_ROW = "cell_row";
|
112
|
+
|
113
|
+
// number(1 origin)
|
114
|
+
@Config(CELL_ROW)
|
115
|
+
@ConfigDefault("null")
|
116
|
+
public Optional<String> getCellRow();
|
117
|
+
|
118
|
+
// A1,B2,... or Sheet1!A1
|
119
|
+
@Config("cell_address")
|
120
|
+
@ConfigDefault("null")
|
121
|
+
public Optional<String> getCellAddress();
|
122
|
+
|
96
123
|
// use when value_type=cell_style, cell_font, ...
|
97
124
|
@Config("attribute_name")
|
98
125
|
@ConfigDefault("null")
|
@@ -100,11 +127,19 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
100
127
|
}
|
101
128
|
|
102
129
|
public interface ColumnCommonOptionTask extends Task {
|
130
|
+
// format of numeric(double) to string
|
131
|
+
@Config("numeric_format")
|
132
|
+
@ConfigDefault("null")
|
133
|
+
public Optional<String> getNumericFormat();
|
103
134
|
|
104
135
|
// search merged cell if cellType=BLANK
|
105
136
|
@Config("search_merged_cell")
|
106
137
|
@ConfigDefault("null")
|
107
|
-
public Optional<
|
138
|
+
public Optional<String> getSearchMergedCell();
|
139
|
+
|
140
|
+
@Config("formula_handling")
|
141
|
+
@ConfigDefault("null")
|
142
|
+
public Optional<String> getFormulaHandling();
|
108
143
|
|
109
144
|
@Config("formula_replace")
|
110
145
|
@ConfigDefault("null")
|
@@ -166,9 +201,60 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
166
201
|
throw new RuntimeException(e);
|
167
202
|
}
|
168
203
|
|
169
|
-
|
204
|
+
List<String> list = resolveSheetName(workbook, sheetNames);
|
205
|
+
if (log.isDebugEnabled()) {
|
206
|
+
log.debug("resolved sheet names={}", list);
|
207
|
+
}
|
208
|
+
run(task, schema, workbook, list, output);
|
209
|
+
}
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
private List<String> resolveSheetName(Workbook workbook, List<String> sheetNames) {
|
214
|
+
Set<String> set = new LinkedHashSet<>();
|
215
|
+
for (String s : sheetNames) {
|
216
|
+
if (s.contains("*") || s.contains("?")) {
|
217
|
+
int length = s.length();
|
218
|
+
StringBuilder sb = new StringBuilder(length * 2);
|
219
|
+
StringBuilder buf = new StringBuilder(32);
|
220
|
+
for (int i = 0; i < length;) {
|
221
|
+
int c = s.codePointAt(i);
|
222
|
+
switch (c) {
|
223
|
+
case '*':
|
224
|
+
if (buf.length() > 0) {
|
225
|
+
sb.append(Pattern.quote(buf.toString()));
|
226
|
+
buf.setLength(0);
|
227
|
+
}
|
228
|
+
sb.append(".*");
|
229
|
+
break;
|
230
|
+
case '?':
|
231
|
+
if (buf.length() > 0) {
|
232
|
+
sb.append(Pattern.quote(buf.toString()));
|
233
|
+
buf.setLength(0);
|
234
|
+
}
|
235
|
+
sb.append(".");
|
236
|
+
break;
|
237
|
+
default:
|
238
|
+
buf.appendCodePoint(c);
|
239
|
+
break;
|
240
|
+
}
|
241
|
+
i += Character.charCount(c);
|
242
|
+
}
|
243
|
+
if (buf.length() > 0) {
|
244
|
+
sb.append(Pattern.quote(buf.toString()));
|
245
|
+
}
|
246
|
+
String regex = sb.toString();
|
247
|
+
for (Sheet sheet : workbook) {
|
248
|
+
String name = sheet.getSheetName();
|
249
|
+
if (name.matches(regex)) {
|
250
|
+
set.add(name);
|
251
|
+
}
|
252
|
+
}
|
253
|
+
} else {
|
254
|
+
set.add(s);
|
170
255
|
}
|
171
256
|
}
|
257
|
+
return new ArrayList<>(set);
|
172
258
|
}
|
173
259
|
|
174
260
|
protected void run(PluginTask task, Schema schema, Workbook workbook, List<String> sheetNames, PageOutput output) {
|
@@ -189,22 +275,27 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
189
275
|
log.info("sheet={}", sheetName);
|
190
276
|
PoiExcelVisitorFactory factory = newPoiExcelVisitorFactory(task, schema, sheet, pageBuilder);
|
191
277
|
PoiExcelColumnVisitor visitor = factory.getPoiExcelColumnVisitor();
|
192
|
-
|
278
|
+
PoiExcelSheetBean sheetBean = factory.getVisitorValue().getSheetBean();
|
279
|
+
final int skipHeaderLines = sheetBean.getSkipHeaderLines();
|
280
|
+
|
281
|
+
PoiExcelRecord record = sheetBean.getRecordType().newPoiExcelRecord();
|
282
|
+
record.initialize(sheet, skipHeaderLines);
|
283
|
+
visitor.setRecord(record);
|
193
284
|
|
194
285
|
int count = 0;
|
195
|
-
for (
|
196
|
-
|
197
|
-
continue;
|
198
|
-
}
|
286
|
+
for (; record.exists(); record.moveNext()) {
|
287
|
+
record.logStart();
|
199
288
|
|
200
|
-
|
201
|
-
schema.visitColumns(visitor);
|
289
|
+
schema.visitColumns(visitor); // use record
|
202
290
|
pageBuilder.addRecord();
|
203
291
|
|
204
292
|
if (++count >= flushCount) {
|
293
|
+
log.trace("flush");
|
205
294
|
pageBuilder.flush();
|
206
295
|
count = 0;
|
207
296
|
}
|
297
|
+
|
298
|
+
record.logEnd();
|
208
299
|
}
|
209
300
|
pageBuilder.flush();
|
210
301
|
}
|
@@ -5,12 +5,16 @@ import java.util.ArrayList;
|
|
5
5
|
import java.util.Collections;
|
6
6
|
import java.util.List;
|
7
7
|
|
8
|
+
import org.apache.poi.ss.util.CellReference;
|
8
9
|
import org.embulk.config.ConfigException;
|
9
10
|
import org.embulk.parser.poi_excel.PoiExcelColumnValueType;
|
10
11
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnCommonOptionTask;
|
11
12
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
|
12
13
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.FormulaReplaceTask;
|
13
14
|
import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.ErrorStrategy.Strategy;
|
15
|
+
import org.embulk.parser.poi_excel.bean.util.PoiExcelCellAddress;
|
16
|
+
import org.embulk.parser.poi_excel.bean.util.SearchMergedCell;
|
17
|
+
import org.embulk.parser.poi_excel.visitor.util.MergedRegionFinder;
|
14
18
|
import org.embulk.spi.Column;
|
15
19
|
|
16
20
|
import com.google.common.base.Optional;
|
@@ -98,6 +102,12 @@ public class PoiExcelColumnBean {
|
|
98
102
|
}
|
99
103
|
|
100
104
|
public Optional<String> getColumnNumber() {
|
105
|
+
for (ColumnOptionTask task : columnTaskList) {
|
106
|
+
Optional<String> option = task.getCellColumn();
|
107
|
+
if (option.isPresent()) {
|
108
|
+
return option;
|
109
|
+
}
|
110
|
+
}
|
101
111
|
for (ColumnOptionTask task : columnTaskList) {
|
102
112
|
Optional<String> option = task.getColumnNumber();
|
103
113
|
if (option.isPresent()) {
|
@@ -107,6 +117,40 @@ public class PoiExcelColumnBean {
|
|
107
117
|
return Optional.absent();
|
108
118
|
}
|
109
119
|
|
120
|
+
public Optional<String> getRowNumber() {
|
121
|
+
for (ColumnOptionTask task : columnTaskList) {
|
122
|
+
Optional<String> option = task.getCellRow();
|
123
|
+
if (option.isPresent()) {
|
124
|
+
return option;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
return Optional.absent();
|
128
|
+
}
|
129
|
+
|
130
|
+
private Optional<PoiExcelCellAddress> cellAddress;
|
131
|
+
|
132
|
+
public PoiExcelCellAddress getCellAddress() {
|
133
|
+
if (cellAddress == null) {
|
134
|
+
this.cellAddress = initializeCellAddress();
|
135
|
+
}
|
136
|
+
return cellAddress.orNull();
|
137
|
+
}
|
138
|
+
|
139
|
+
protected Optional<PoiExcelCellAddress> initializeCellAddress() {
|
140
|
+
for (ColumnOptionTask task : columnTaskList) {
|
141
|
+
Optional<String> option = task.getCellAddress();
|
142
|
+
if (option.isPresent()) {
|
143
|
+
CellReference ref = new CellReference(option.get());
|
144
|
+
return Optional.of(new PoiExcelCellAddress(ref));
|
145
|
+
}
|
146
|
+
}
|
147
|
+
return Optional.absent();
|
148
|
+
}
|
149
|
+
|
150
|
+
public void setCellAddress(CellReference ref) {
|
151
|
+
this.cellAddress = Optional.of(new PoiExcelCellAddress(ref));
|
152
|
+
}
|
153
|
+
|
110
154
|
protected abstract class CacheValue<T> {
|
111
155
|
private T value;
|
112
156
|
|
@@ -231,23 +275,105 @@ public class PoiExcelColumnBean {
|
|
231
275
|
return attributeName.get();
|
232
276
|
}
|
233
277
|
|
234
|
-
private CacheValue<
|
278
|
+
private CacheValue<String> numericFormat = new CacheValue<String>() {
|
279
|
+
|
280
|
+
@Override
|
281
|
+
protected Optional<String> getTaskValue(ColumnCommonOptionTask task) {
|
282
|
+
return task.getNumericFormat();
|
283
|
+
}
|
284
|
+
|
285
|
+
@Override
|
286
|
+
protected String getDefaultValue() {
|
287
|
+
return "";
|
288
|
+
}
|
289
|
+
};
|
290
|
+
|
291
|
+
public String getNumericFormat() {
|
292
|
+
return numericFormat.get();
|
293
|
+
}
|
294
|
+
|
295
|
+
private CacheValue<SearchMergedCell> searchMergedCell = new CacheValue<SearchMergedCell>() {
|
235
296
|
|
236
297
|
@Override
|
237
|
-
protected Optional<
|
238
|
-
|
298
|
+
protected Optional<SearchMergedCell> getTaskValue(ColumnCommonOptionTask task) {
|
299
|
+
Optional<String> option = task.getSearchMergedCell();
|
300
|
+
String value = option.or("null").trim();
|
301
|
+
switch (value.toLowerCase()) {
|
302
|
+
case "null":
|
303
|
+
return Optional.absent();
|
304
|
+
case "true": // compatibility ver 0.1.7
|
305
|
+
return Optional.of(getDefaultValue());
|
306
|
+
case "false": // compatibility ver 0.1.7
|
307
|
+
return Optional.of(SearchMergedCell.NONE);
|
308
|
+
default:
|
309
|
+
break;
|
310
|
+
}
|
311
|
+
try {
|
312
|
+
return Optional.of(SearchMergedCell.valueOf(value.toUpperCase()));
|
313
|
+
} catch (Exception e) {
|
314
|
+
List<String> list = new ArrayList<>();
|
315
|
+
for (SearchMergedCell s : SearchMergedCell.values()) {
|
316
|
+
list.add(s.name().toLowerCase());
|
317
|
+
}
|
318
|
+
throw new ConfigException(MessageFormat.format("illegal search_merged_cell={0}. expected={1}", value,
|
319
|
+
list), e);
|
320
|
+
}
|
239
321
|
}
|
240
322
|
|
241
323
|
@Override
|
242
|
-
protected
|
243
|
-
return
|
324
|
+
protected SearchMergedCell getDefaultValue() {
|
325
|
+
return SearchMergedCell.HASH_SEARCH;
|
244
326
|
}
|
245
327
|
};
|
246
328
|
|
247
|
-
public
|
329
|
+
public SearchMergedCell getSearchMergedCell() {
|
248
330
|
return searchMergedCell.get();
|
249
331
|
}
|
250
332
|
|
333
|
+
private MergedRegionFinder mergedRegionFinder;
|
334
|
+
|
335
|
+
public MergedRegionFinder getMergedRegionFinder() {
|
336
|
+
if (mergedRegionFinder == null) {
|
337
|
+
this.mergedRegionFinder = getSearchMergedCell().getMergedRegionFinder();
|
338
|
+
}
|
339
|
+
return mergedRegionFinder;
|
340
|
+
}
|
341
|
+
|
342
|
+
public enum FormulaHandling {
|
343
|
+
EVALUATE, CASHED_VALUE
|
344
|
+
}
|
345
|
+
|
346
|
+
private CacheValue<FormulaHandling> formulaHandling = new CacheValue<FormulaHandling>() {
|
347
|
+
|
348
|
+
@Override
|
349
|
+
protected Optional<FormulaHandling> getTaskValue(ColumnCommonOptionTask task) {
|
350
|
+
Optional<String> option = task.getFormulaHandling();
|
351
|
+
String value = option.or("null");
|
352
|
+
if ("null".equalsIgnoreCase(value)) {
|
353
|
+
return Optional.absent();
|
354
|
+
}
|
355
|
+
try {
|
356
|
+
return Optional.of(FormulaHandling.valueOf(value.trim().toUpperCase()));
|
357
|
+
} catch (Exception e) {
|
358
|
+
List<String> list = new ArrayList<>();
|
359
|
+
for (FormulaHandling s : FormulaHandling.values()) {
|
360
|
+
list.add(s.name().toLowerCase());
|
361
|
+
}
|
362
|
+
throw new ConfigException(MessageFormat.format("illegal formula_handling={0}. expected={1}", value,
|
363
|
+
list), e);
|
364
|
+
}
|
365
|
+
}
|
366
|
+
|
367
|
+
@Override
|
368
|
+
protected FormulaHandling getDefaultValue() {
|
369
|
+
return FormulaHandling.EVALUATE;
|
370
|
+
}
|
371
|
+
};
|
372
|
+
|
373
|
+
public FormulaHandling getFormulaHandling() {
|
374
|
+
return formulaHandling.get();
|
375
|
+
}
|
376
|
+
|
251
377
|
private CacheValue<List<FormulaReplaceTask>> formulaReplace = new CacheValue<List<FormulaReplaceTask>>() {
|
252
378
|
|
253
379
|
@Override
|
@@ -7,7 +7,10 @@ import java.util.Map;
|
|
7
7
|
|
8
8
|
import org.apache.poi.ss.util.CellReference;
|
9
9
|
import org.embulk.parser.poi_excel.PoiExcelColumnValueType;
|
10
|
+
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
|
10
11
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
|
12
|
+
import org.embulk.parser.poi_excel.bean.record.RecordType;
|
13
|
+
import org.embulk.parser.poi_excel.bean.util.PoiExcelCellAddress;
|
11
14
|
import org.embulk.spi.Column;
|
12
15
|
import org.embulk.spi.Exec;
|
13
16
|
import org.embulk.spi.Schema;
|
@@ -18,68 +21,105 @@ import com.google.common.base.Optional;
|
|
18
21
|
public class PoiExcelColumnIndex {
|
19
22
|
private final Logger log = Exec.getLogger(getClass());
|
20
23
|
|
24
|
+
protected final RecordType recordType;
|
21
25
|
protected final Map<String, Integer> indexMap = new LinkedHashMap<>();
|
22
26
|
|
27
|
+
public PoiExcelColumnIndex(PoiExcelSheetBean sheetBean) {
|
28
|
+
this.recordType = sheetBean.getRecordType();
|
29
|
+
}
|
30
|
+
|
23
31
|
public void initializeColumnIndex(PluginTask task, List<PoiExcelColumnBean> beanList) {
|
32
|
+
log.info("record_type={}", recordType);
|
33
|
+
|
24
34
|
int index = -1;
|
25
35
|
indexMap.clear();
|
26
36
|
|
27
37
|
Schema schema = task.getColumns().toSchema();
|
28
38
|
for (Column column : schema.getColumns()) {
|
29
39
|
PoiExcelColumnBean bean = beanList.get(column.getIndex());
|
30
|
-
|
40
|
+
initializeCellAddress(column, bean);
|
31
41
|
|
32
|
-
|
42
|
+
PoiExcelColumnValueType valueType = bean.getValueType();
|
43
|
+
if (valueType.useCell(recordType)) {
|
33
44
|
index = resolveColumnIndex(column, bean, index, valueType);
|
34
45
|
if (index < 0) {
|
35
46
|
index = 0;
|
36
47
|
}
|
37
48
|
bean.setColumnIndex(index);
|
38
49
|
indexMap.put(column.getName(), index);
|
39
|
-
if (log.isInfoEnabled()) {
|
40
|
-
String c = CellReference.convertNumToColString(index);
|
41
|
-
switch (valueType) {
|
42
|
-
default:
|
43
|
-
String suffix = bean.getValueTypeSuffix();
|
44
|
-
if (suffix != null) {
|
45
|
-
log.info("column.name={} <- cell_column={}, value_type={}, value=[{}]", column.getName(),
|
46
|
-
c, valueType, suffix);
|
47
|
-
} else {
|
48
|
-
log.info("column.name={} <- cell_column={}, value_type={}, value={}", column.getName(), c,
|
49
|
-
valueType, suffix);
|
50
|
-
}
|
51
|
-
break;
|
52
|
-
case CELL_VALUE:
|
53
|
-
case CELL_FORMULA:
|
54
|
-
case COLUMN_NUMBER:
|
55
|
-
log.info("column.name={} <- cell_column={}, value_type={}", column.getName(), c, valueType);
|
56
|
-
break;
|
57
|
-
}
|
58
|
-
}
|
59
|
-
} else {
|
60
|
-
if (log.isInfoEnabled()) {
|
61
|
-
switch (valueType) {
|
62
|
-
default:
|
63
|
-
String suffix = bean.getValueTypeSuffix();
|
64
|
-
if (suffix != null) {
|
65
|
-
log.info("column.name={} <- value_type={}, value=[{}]", column.getName(), valueType, suffix);
|
66
|
-
} else {
|
67
|
-
log.info("column.name={} <- value_type={}, value={}", column.getName(), valueType, suffix);
|
68
|
-
}
|
69
|
-
break;
|
70
|
-
case SHEET_NAME:
|
71
|
-
case ROW_NUMBER:
|
72
|
-
log.info("column.name={} <- value_type={}", column.getName(), valueType);
|
73
|
-
break;
|
74
|
-
}
|
75
|
-
}
|
76
50
|
}
|
51
|
+
|
52
|
+
initializeCellAddress2(column, bean, index);
|
53
|
+
|
54
|
+
if (log.isInfoEnabled()) {
|
55
|
+
logColumn(column, bean, valueType, index);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
protected void initializeCellAddress(Column column, PoiExcelColumnBean bean) {
|
61
|
+
if (bean.getCellAddress() != null) {
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
|
65
|
+
Optional<String> rowOption = bean.getRowNumber();
|
66
|
+
Optional<String> colOption = bean.getColumnNumber();
|
67
|
+
if (rowOption.isPresent() && colOption.isPresent()) {
|
68
|
+
String rowNumber = rowOption.get();
|
69
|
+
String colNumber = colOption.get();
|
70
|
+
initializeCellAddress(column, bean, rowNumber, colNumber);
|
71
|
+
return;
|
72
|
+
}
|
73
|
+
|
74
|
+
if (recordType == RecordType.SHEET) {
|
75
|
+
String rowNumber = rowOption.or("1");
|
76
|
+
String colNumber = colOption.or("A");
|
77
|
+
initializeCellAddress(column, bean, rowNumber, colNumber);
|
78
|
+
return;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
protected void initializeCellAddress(Column column, PoiExcelColumnBean bean, String rowNumber, String columnNumber) {
|
83
|
+
int columnIndex = convertColumnIndex(column, OPTION_NAME_CELL_COLUMN, columnNumber);
|
84
|
+
int rowIndex = convertColumnIndex(column, OPTION_NAME_CELL_ROW, rowNumber);
|
85
|
+
CellReference ref = new CellReference(rowIndex, columnIndex);
|
86
|
+
bean.setCellAddress(ref);
|
87
|
+
}
|
88
|
+
|
89
|
+
protected void initializeCellAddress2(Column column, PoiExcelColumnBean bean, int index) {
|
90
|
+
if (bean.getCellAddress() != null) {
|
91
|
+
return;
|
92
|
+
}
|
93
|
+
|
94
|
+
Optional<String> recordOption = recordType.getRecordOption(bean);
|
95
|
+
if (recordOption.isPresent()) {
|
96
|
+
int rowIndex, columnIndex;
|
97
|
+
switch (recordType) {
|
98
|
+
case ROW:
|
99
|
+
rowIndex = convertColumnIndex(column, recordType.getRecordOptionName(), recordOption.get());
|
100
|
+
columnIndex = (index >= 0) ? index : 0;
|
101
|
+
break;
|
102
|
+
case COLUMN:
|
103
|
+
rowIndex = (index >= 0) ? index : 0;
|
104
|
+
columnIndex = convertColumnIndex(column, recordType.getRecordOptionName(), recordOption.get());
|
105
|
+
break;
|
106
|
+
default:
|
107
|
+
throw new IllegalStateException();
|
108
|
+
}
|
109
|
+
CellReference ref = new CellReference(rowIndex, columnIndex);
|
110
|
+
bean.setCellAddress(ref);
|
77
111
|
}
|
78
112
|
}
|
79
113
|
|
80
114
|
protected int resolveColumnIndex(Column column, PoiExcelColumnBean bean, int index,
|
81
115
|
PoiExcelColumnValueType valueType) {
|
82
|
-
Optional<String> numberOption =
|
116
|
+
Optional<String> numberOption = recordType.getNumberOption(bean);
|
117
|
+
PoiExcelCellAddress cellAddress = bean.getCellAddress();
|
118
|
+
|
119
|
+
if (cellAddress != null) {
|
120
|
+
return index;
|
121
|
+
}
|
122
|
+
|
83
123
|
if (numberOption.isPresent()) {
|
84
124
|
String columnNumber = numberOption.get();
|
85
125
|
if (columnNumber.length() >= 1) {
|
@@ -96,7 +136,7 @@ public class PoiExcelColumnIndex {
|
|
96
136
|
break;
|
97
137
|
}
|
98
138
|
}
|
99
|
-
return convertColumnIndex(column, columnNumber);
|
139
|
+
return convertColumnIndex(column, recordType.getNumberOptionName(), columnNumber);
|
100
140
|
} else {
|
101
141
|
if (valueType.nextIndex()) {
|
102
142
|
index++;
|
@@ -167,11 +207,12 @@ public class PoiExcelColumnIndex {
|
|
167
207
|
|
168
208
|
protected void checkIndex(Column column, int index) {
|
169
209
|
if (index < 0) {
|
170
|
-
throw new RuntimeException(MessageFormat.format("
|
210
|
+
throw new RuntimeException(MessageFormat.format("{0} out of range at {1}",
|
211
|
+
recordType.getNumberOptionName(), column));
|
171
212
|
}
|
172
213
|
}
|
173
214
|
|
174
|
-
protected int convertColumnIndex(Column column, String columnNumber) {
|
215
|
+
protected int convertColumnIndex(Column column, String numberOptionName, String columnNumber) {
|
175
216
|
int index;
|
176
217
|
try {
|
177
218
|
char c = columnNumber.charAt(0);
|
@@ -181,13 +222,92 @@ public class PoiExcelColumnIndex {
|
|
181
222
|
index = CellReference.convertColStringToIndex(columnNumber);
|
182
223
|
}
|
183
224
|
} catch (Exception e) {
|
184
|
-
throw new RuntimeException(MessageFormat.format("illegal
|
185
|
-
column), e);
|
225
|
+
throw new RuntimeException(MessageFormat.format("illegal {0}=\"{1}\" at {2}", numberOptionName,
|
226
|
+
columnNumber, column), e);
|
186
227
|
}
|
187
228
|
if (index < 0) {
|
188
|
-
throw new RuntimeException(MessageFormat.format("illegal
|
189
|
-
column));
|
229
|
+
throw new RuntimeException(MessageFormat.format("illegal {0}=\"{1}\" at {2}", numberOptionName,
|
230
|
+
columnNumber, column));
|
190
231
|
}
|
191
232
|
return index;
|
192
233
|
}
|
234
|
+
|
235
|
+
private static final String OPTION_NAME_CELL_COLUMN = ColumnOptionTask.CELL_COLUMN;
|
236
|
+
private static final String OPTION_NAME_CELL_ROW = ColumnOptionTask.CELL_ROW;
|
237
|
+
|
238
|
+
protected void logColumn(Column column, PoiExcelColumnBean bean, PoiExcelColumnValueType valueType, int index) {
|
239
|
+
PoiExcelCellAddress cellAddress = bean.getCellAddress();
|
240
|
+
|
241
|
+
String cname, cvalue;
|
242
|
+
if (cellAddress != null) {
|
243
|
+
cname = "cell_address";
|
244
|
+
cvalue = cellAddress.getString();
|
245
|
+
} else {
|
246
|
+
switch (recordType) {
|
247
|
+
default:
|
248
|
+
cname = OPTION_NAME_CELL_COLUMN;
|
249
|
+
cvalue = CellReference.convertNumToColString(index);
|
250
|
+
break;
|
251
|
+
case COLUMN:
|
252
|
+
cname = OPTION_NAME_CELL_ROW;
|
253
|
+
cvalue = Integer.toString(index + 1);
|
254
|
+
break;
|
255
|
+
case SHEET:
|
256
|
+
cname = "sheet";
|
257
|
+
cvalue = null;
|
258
|
+
break;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
262
|
+
switch (valueType) {
|
263
|
+
default:
|
264
|
+
case CELL_VALUE:
|
265
|
+
case CELL_FORMULA:
|
266
|
+
case CELL_TYPE:
|
267
|
+
case CELL_CACHED_TYPE:
|
268
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
269
|
+
break;
|
270
|
+
case CELL_STYLE:
|
271
|
+
case CELL_FONT:
|
272
|
+
case CELL_COMMENT:
|
273
|
+
String suffix = bean.getValueTypeSuffix();
|
274
|
+
if (suffix != null) {
|
275
|
+
log.info("column.name={} <- {}={}, value={}[{}]", column.getName(), cname, cvalue, valueType, suffix);
|
276
|
+
} else {
|
277
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
278
|
+
}
|
279
|
+
break;
|
280
|
+
|
281
|
+
case SHEET_NAME:
|
282
|
+
if (cellAddress != null && cellAddress.getSheetName() != null) {
|
283
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
284
|
+
} else {
|
285
|
+
log.info("column.name={} <- value={}", column.getName(), valueType);
|
286
|
+
}
|
287
|
+
break;
|
288
|
+
case ROW_NUMBER:
|
289
|
+
if (cellAddress != null || cname.equals(OPTION_NAME_CELL_ROW)) {
|
290
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
291
|
+
} else {
|
292
|
+
log.info("column.name={} <- value={}", column.getName(), valueType);
|
293
|
+
}
|
294
|
+
break;
|
295
|
+
case COLUMN_NUMBER:
|
296
|
+
if (cellAddress != null || cname.equals(OPTION_NAME_CELL_COLUMN)) {
|
297
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
298
|
+
} else {
|
299
|
+
log.info("column.name={} <- value={}", column.getName(), valueType);
|
300
|
+
}
|
301
|
+
break;
|
302
|
+
|
303
|
+
case CONSTANT:
|
304
|
+
String value = bean.getValueTypeSuffix();
|
305
|
+
if (value != null) {
|
306
|
+
log.info("column.name={} <- value={}[{}]", column.getName(), valueType, value);
|
307
|
+
} else {
|
308
|
+
log.info("column.name={} <- value={}({})", column.getName(), valueType, value);
|
309
|
+
}
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
}
|
193
313
|
}
|