embulk-parser-poi_excel 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +21 -21
- data/README.md +247 -233
- data/build.gradle +92 -86
- data/classpath/{embulk-parser-poi_excel-0.1.11.jar → embulk-parser-poi_excel-0.1.13.jar} +0 -0
- data/gradlew +172 -172
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +23 -3
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +38 -23
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +23 -1
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +114 -28
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelSheetBean.java +13 -1
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecord.java +52 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordColumn.java +80 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordRow.java +76 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordSheet.java +55 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/RecordType.java +114 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java +18 -9
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java +8 -1
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +26 -26
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorValue.java +28 -1
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +9 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_recordType.java +192 -0
- metadata +9 -3
@@ -10,7 +10,6 @@ import java.util.regex.Pattern;
|
|
10
10
|
|
11
11
|
import org.apache.poi.EncryptedDocumentException;
|
12
12
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
13
|
-
import org.apache.poi.ss.usermodel.Row;
|
14
13
|
import org.apache.poi.ss.usermodel.Sheet;
|
15
14
|
import org.apache.poi.ss.usermodel.Workbook;
|
16
15
|
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
@@ -20,6 +19,8 @@ import org.embulk.config.ConfigException;
|
|
20
19
|
import org.embulk.config.ConfigSource;
|
21
20
|
import org.embulk.config.Task;
|
22
21
|
import org.embulk.config.TaskSource;
|
22
|
+
import org.embulk.parser.poi_excel.bean.PoiExcelSheetBean;
|
23
|
+
import org.embulk.parser.poi_excel.bean.record.PoiExcelRecord;
|
23
24
|
import org.embulk.parser.poi_excel.visitor.PoiExcelColumnVisitor;
|
24
25
|
import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorFactory;
|
25
26
|
import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue;
|
@@ -69,6 +70,10 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
69
70
|
|
70
71
|
public interface SheetCommonOptionTask extends Task, ColumnCommonOptionTask {
|
71
72
|
|
73
|
+
@Config("record_type")
|
74
|
+
@ConfigDefault("null")
|
75
|
+
public Optional<String> getRecordType();
|
76
|
+
|
72
77
|
@Config("skip_header_lines")
|
73
78
|
@ConfigDefault("null")
|
74
79
|
public Optional<Integer> getSkipHeaderLines();
|
@@ -91,11 +96,25 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
91
96
|
@ConfigDefault("null")
|
92
97
|
public Optional<String> getValueType();
|
93
98
|
|
94
|
-
//
|
99
|
+
// same as cell_column
|
95
100
|
@Config("column_number")
|
96
101
|
@ConfigDefault("null")
|
97
102
|
public Optional<String> getColumnNumber();
|
98
103
|
|
104
|
+
public static final String CELL_COLUMN = "cell_column";
|
105
|
+
|
106
|
+
// A,B,... or number(1 origin)
|
107
|
+
@Config(CELL_COLUMN)
|
108
|
+
@ConfigDefault("null")
|
109
|
+
public Optional<String> getCellColumn();
|
110
|
+
|
111
|
+
public static final String CELL_ROW = "cell_row";
|
112
|
+
|
113
|
+
// number(1 origin)
|
114
|
+
@Config(CELL_ROW)
|
115
|
+
@ConfigDefault("null")
|
116
|
+
public Optional<String> getCellRow();
|
117
|
+
|
99
118
|
// A1,B2,... or Sheet1!A1
|
100
119
|
@Config("cell_address")
|
101
120
|
@ConfigDefault("null")
|
@@ -186,7 +205,7 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
186
205
|
if (log.isDebugEnabled()) {
|
187
206
|
log.debug("resolved sheet names={}", list);
|
188
207
|
}
|
189
|
-
run(task, schema, workbook, list, output);
|
208
|
+
run(task, schema, input, workbook, list, output);
|
190
209
|
}
|
191
210
|
}
|
192
211
|
}
|
@@ -238,7 +257,8 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
238
257
|
return new ArrayList<>(set);
|
239
258
|
}
|
240
259
|
|
241
|
-
protected void run(PluginTask task, Schema schema, Workbook workbook, List<String> sheetNames,
|
260
|
+
protected void run(PluginTask task, Schema schema, FileInput input, Workbook workbook, List<String> sheetNames,
|
261
|
+
PageOutput output) {
|
242
262
|
final int flushCount = task.getFlushCount();
|
243
263
|
|
244
264
|
try (PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
@@ -254,23 +274,20 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
254
274
|
}
|
255
275
|
|
256
276
|
log.info("sheet={}", sheetName);
|
257
|
-
PoiExcelVisitorFactory factory = newPoiExcelVisitorFactory(task, schema, sheet, pageBuilder);
|
277
|
+
PoiExcelVisitorFactory factory = newPoiExcelVisitorFactory(task, schema, input, sheet, pageBuilder);
|
258
278
|
PoiExcelColumnVisitor visitor = factory.getPoiExcelColumnVisitor();
|
259
|
-
|
279
|
+
PoiExcelSheetBean sheetBean = factory.getVisitorValue().getSheetBean();
|
280
|
+
final int skipHeaderLines = sheetBean.getSkipHeaderLines();
|
281
|
+
|
282
|
+
PoiExcelRecord record = sheetBean.getRecordType().newPoiExcelRecord();
|
283
|
+
record.initialize(sheet, skipHeaderLines);
|
284
|
+
visitor.setRecord(record);
|
260
285
|
|
261
286
|
int count = 0;
|
262
|
-
for (
|
263
|
-
|
264
|
-
if (rowIndex < skipHeaderLines) {
|
265
|
-
log.debug("row({}) skipped", rowIndex);
|
266
|
-
continue;
|
267
|
-
}
|
268
|
-
if (log.isDebugEnabled()) {
|
269
|
-
log.debug("row({}) start", rowIndex);
|
270
|
-
}
|
287
|
+
for (; record.exists(); record.moveNext()) {
|
288
|
+
record.logStart();
|
271
289
|
|
272
|
-
|
273
|
-
schema.visitColumns(visitor);
|
290
|
+
schema.visitColumns(visitor); // use record
|
274
291
|
pageBuilder.addRecord();
|
275
292
|
|
276
293
|
if (++count >= flushCount) {
|
@@ -279,9 +296,7 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
279
296
|
count = 0;
|
280
297
|
}
|
281
298
|
|
282
|
-
|
283
|
-
log.debug("row({}) end", rowIndex);
|
284
|
-
}
|
299
|
+
record.logEnd();
|
285
300
|
}
|
286
301
|
pageBuilder.flush();
|
287
302
|
}
|
@@ -289,9 +304,9 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
289
304
|
}
|
290
305
|
}
|
291
306
|
|
292
|
-
protected PoiExcelVisitorFactory newPoiExcelVisitorFactory(PluginTask task, Schema schema,
|
293
|
-
PageBuilder pageBuilder) {
|
294
|
-
PoiExcelVisitorValue visitorValue = new PoiExcelVisitorValue(task, schema, sheet, pageBuilder);
|
307
|
+
protected PoiExcelVisitorFactory newPoiExcelVisitorFactory(PluginTask task, Schema schema, FileInput input,
|
308
|
+
Sheet sheet, PageBuilder pageBuilder) {
|
309
|
+
PoiExcelVisitorValue visitorValue = new PoiExcelVisitorValue(task, schema, input, sheet, pageBuilder);
|
295
310
|
return new PoiExcelVisitorFactory(visitorValue);
|
296
311
|
}
|
297
312
|
}
|
@@ -5,6 +5,7 @@ import java.util.ArrayList;
|
|
5
5
|
import java.util.Collections;
|
6
6
|
import java.util.List;
|
7
7
|
|
8
|
+
import org.apache.poi.ss.util.CellReference;
|
8
9
|
import org.embulk.config.ConfigException;
|
9
10
|
import org.embulk.parser.poi_excel.PoiExcelColumnValueType;
|
10
11
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnCommonOptionTask;
|
@@ -101,6 +102,12 @@ public class PoiExcelColumnBean {
|
|
101
102
|
}
|
102
103
|
|
103
104
|
public Optional<String> getColumnNumber() {
|
105
|
+
for (ColumnOptionTask task : columnTaskList) {
|
106
|
+
Optional<String> option = task.getCellColumn();
|
107
|
+
if (option.isPresent()) {
|
108
|
+
return option;
|
109
|
+
}
|
110
|
+
}
|
104
111
|
for (ColumnOptionTask task : columnTaskList) {
|
105
112
|
Optional<String> option = task.getColumnNumber();
|
106
113
|
if (option.isPresent()) {
|
@@ -110,6 +117,16 @@ public class PoiExcelColumnBean {
|
|
110
117
|
return Optional.absent();
|
111
118
|
}
|
112
119
|
|
120
|
+
public Optional<String> getRowNumber() {
|
121
|
+
for (ColumnOptionTask task : columnTaskList) {
|
122
|
+
Optional<String> option = task.getCellRow();
|
123
|
+
if (option.isPresent()) {
|
124
|
+
return option;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
return Optional.absent();
|
128
|
+
}
|
129
|
+
|
113
130
|
private Optional<PoiExcelCellAddress> cellAddress;
|
114
131
|
|
115
132
|
public PoiExcelCellAddress getCellAddress() {
|
@@ -123,12 +140,17 @@ public class PoiExcelColumnBean {
|
|
123
140
|
for (ColumnOptionTask task : columnTaskList) {
|
124
141
|
Optional<String> option = task.getCellAddress();
|
125
142
|
if (option.isPresent()) {
|
126
|
-
|
143
|
+
CellReference ref = new CellReference(option.get());
|
144
|
+
return Optional.of(new PoiExcelCellAddress(ref));
|
127
145
|
}
|
128
146
|
}
|
129
147
|
return Optional.absent();
|
130
148
|
}
|
131
149
|
|
150
|
+
public void setCellAddress(CellReference ref) {
|
151
|
+
this.cellAddress = Optional.of(new PoiExcelCellAddress(ref));
|
152
|
+
}
|
153
|
+
|
132
154
|
protected abstract class CacheValue<T> {
|
133
155
|
private T value;
|
134
156
|
|
@@ -7,7 +7,9 @@ import java.util.Map;
|
|
7
7
|
|
8
8
|
import org.apache.poi.ss.util.CellReference;
|
9
9
|
import org.embulk.parser.poi_excel.PoiExcelColumnValueType;
|
10
|
+
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
|
10
11
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
|
12
|
+
import org.embulk.parser.poi_excel.bean.record.RecordType;
|
11
13
|
import org.embulk.parser.poi_excel.bean.util.PoiExcelCellAddress;
|
12
14
|
import org.embulk.spi.Column;
|
13
15
|
import org.embulk.spi.Exec;
|
@@ -19,18 +21,26 @@ import com.google.common.base.Optional;
|
|
19
21
|
public class PoiExcelColumnIndex {
|
20
22
|
private final Logger log = Exec.getLogger(getClass());
|
21
23
|
|
24
|
+
protected final RecordType recordType;
|
22
25
|
protected final Map<String, Integer> indexMap = new LinkedHashMap<>();
|
23
26
|
|
27
|
+
public PoiExcelColumnIndex(PoiExcelSheetBean sheetBean) {
|
28
|
+
this.recordType = sheetBean.getRecordType();
|
29
|
+
}
|
30
|
+
|
24
31
|
public void initializeColumnIndex(PluginTask task, List<PoiExcelColumnBean> beanList) {
|
32
|
+
log.info("record_type={}", recordType);
|
33
|
+
|
25
34
|
int index = -1;
|
26
35
|
indexMap.clear();
|
27
36
|
|
28
37
|
Schema schema = task.getColumns().toSchema();
|
29
38
|
for (Column column : schema.getColumns()) {
|
30
39
|
PoiExcelColumnBean bean = beanList.get(column.getIndex());
|
31
|
-
|
40
|
+
initializeCellAddress(column, bean);
|
32
41
|
|
33
|
-
|
42
|
+
PoiExcelColumnValueType valueType = bean.getValueType();
|
43
|
+
if (valueType.useCell(recordType)) {
|
34
44
|
index = resolveColumnIndex(column, bean, index, valueType);
|
35
45
|
if (index < 0) {
|
36
46
|
index = 0;
|
@@ -39,21 +49,74 @@ public class PoiExcelColumnIndex {
|
|
39
49
|
indexMap.put(column.getName(), index);
|
40
50
|
}
|
41
51
|
|
52
|
+
initializeCellAddress2(column, bean, index);
|
53
|
+
|
42
54
|
if (log.isInfoEnabled()) {
|
43
55
|
logColumn(column, bean, valueType, index);
|
44
56
|
}
|
45
57
|
}
|
46
58
|
}
|
47
59
|
|
60
|
+
protected void initializeCellAddress(Column column, PoiExcelColumnBean bean) {
|
61
|
+
if (bean.getCellAddress() != null) {
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
|
65
|
+
Optional<String> rowOption = bean.getRowNumber();
|
66
|
+
Optional<String> colOption = bean.getColumnNumber();
|
67
|
+
if (rowOption.isPresent() && colOption.isPresent()) {
|
68
|
+
String rowNumber = rowOption.get();
|
69
|
+
String colNumber = colOption.get();
|
70
|
+
initializeCellAddress(column, bean, rowNumber, colNumber);
|
71
|
+
return;
|
72
|
+
}
|
73
|
+
|
74
|
+
if (recordType == RecordType.SHEET) {
|
75
|
+
String rowNumber = rowOption.or("1");
|
76
|
+
String colNumber = colOption.or("A");
|
77
|
+
initializeCellAddress(column, bean, rowNumber, colNumber);
|
78
|
+
return;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
protected void initializeCellAddress(Column column, PoiExcelColumnBean bean, String rowNumber, String columnNumber) {
|
83
|
+
int columnIndex = convertColumnIndex(column, OPTION_NAME_CELL_COLUMN, columnNumber);
|
84
|
+
int rowIndex = convertColumnIndex(column, OPTION_NAME_CELL_ROW, rowNumber);
|
85
|
+
CellReference ref = new CellReference(rowIndex, columnIndex);
|
86
|
+
bean.setCellAddress(ref);
|
87
|
+
}
|
88
|
+
|
89
|
+
protected void initializeCellAddress2(Column column, PoiExcelColumnBean bean, int index) {
|
90
|
+
if (bean.getCellAddress() != null) {
|
91
|
+
return;
|
92
|
+
}
|
93
|
+
|
94
|
+
Optional<String> recordOption = recordType.getRecordOption(bean);
|
95
|
+
if (recordOption.isPresent()) {
|
96
|
+
int rowIndex, columnIndex;
|
97
|
+
switch (recordType) {
|
98
|
+
case ROW:
|
99
|
+
rowIndex = convertColumnIndex(column, recordType.getRecordOptionName(), recordOption.get());
|
100
|
+
columnIndex = (index >= 0) ? index : 0;
|
101
|
+
break;
|
102
|
+
case COLUMN:
|
103
|
+
rowIndex = (index >= 0) ? index : 0;
|
104
|
+
columnIndex = convertColumnIndex(column, recordType.getRecordOptionName(), recordOption.get());
|
105
|
+
break;
|
106
|
+
default:
|
107
|
+
throw new IllegalStateException();
|
108
|
+
}
|
109
|
+
CellReference ref = new CellReference(rowIndex, columnIndex);
|
110
|
+
bean.setCellAddress(ref);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
48
114
|
protected int resolveColumnIndex(Column column, PoiExcelColumnBean bean, int index,
|
49
115
|
PoiExcelColumnValueType valueType) {
|
50
|
-
Optional<String> numberOption =
|
116
|
+
Optional<String> numberOption = recordType.getNumberOption(bean);
|
51
117
|
PoiExcelCellAddress cellAddress = bean.getCellAddress();
|
52
118
|
|
53
119
|
if (cellAddress != null) {
|
54
|
-
if (numberOption.isPresent()) {
|
55
|
-
throw new RuntimeException("only one of column_number, cell_address can be specified");
|
56
|
-
}
|
57
120
|
return index;
|
58
121
|
}
|
59
122
|
|
@@ -73,7 +136,7 @@ public class PoiExcelColumnIndex {
|
|
73
136
|
break;
|
74
137
|
}
|
75
138
|
}
|
76
|
-
return convertColumnIndex(column, columnNumber);
|
139
|
+
return convertColumnIndex(column, recordType.getNumberOptionName(), columnNumber);
|
77
140
|
} else {
|
78
141
|
if (valueType.nextIndex()) {
|
79
142
|
index++;
|
@@ -144,11 +207,12 @@ public class PoiExcelColumnIndex {
|
|
144
207
|
|
145
208
|
protected void checkIndex(Column column, int index) {
|
146
209
|
if (index < 0) {
|
147
|
-
throw new RuntimeException(MessageFormat.format("
|
210
|
+
throw new RuntimeException(MessageFormat.format("{0} out of range at {1}",
|
211
|
+
recordType.getNumberOptionName(), column));
|
148
212
|
}
|
149
213
|
}
|
150
214
|
|
151
|
-
protected int convertColumnIndex(Column column, String columnNumber) {
|
215
|
+
protected int convertColumnIndex(Column column, String numberOptionName, String columnNumber) {
|
152
216
|
int index;
|
153
217
|
try {
|
154
218
|
char c = columnNumber.charAt(0);
|
@@ -158,16 +222,19 @@ public class PoiExcelColumnIndex {
|
|
158
222
|
index = CellReference.convertColStringToIndex(columnNumber);
|
159
223
|
}
|
160
224
|
} catch (Exception e) {
|
161
|
-
throw new RuntimeException(MessageFormat.format("illegal
|
162
|
-
column), e);
|
225
|
+
throw new RuntimeException(MessageFormat.format("illegal {0}=\"{1}\" at {2}", numberOptionName,
|
226
|
+
columnNumber, column), e);
|
163
227
|
}
|
164
228
|
if (index < 0) {
|
165
|
-
throw new RuntimeException(MessageFormat.format("illegal
|
166
|
-
column));
|
229
|
+
throw new RuntimeException(MessageFormat.format("illegal {0}=\"{1}\" at {2}", numberOptionName,
|
230
|
+
columnNumber, column));
|
167
231
|
}
|
168
232
|
return index;
|
169
233
|
}
|
170
234
|
|
235
|
+
private static final String OPTION_NAME_CELL_COLUMN = ColumnOptionTask.CELL_COLUMN;
|
236
|
+
private static final String OPTION_NAME_CELL_ROW = ColumnOptionTask.CELL_ROW;
|
237
|
+
|
171
238
|
protected void logColumn(Column column, PoiExcelColumnBean bean, PoiExcelColumnValueType valueType, int index) {
|
172
239
|
PoiExcelCellAddress cellAddress = bean.getCellAddress();
|
173
240
|
|
@@ -176,8 +243,20 @@ public class PoiExcelColumnIndex {
|
|
176
243
|
cname = "cell_address";
|
177
244
|
cvalue = cellAddress.getString();
|
178
245
|
} else {
|
179
|
-
|
180
|
-
|
246
|
+
switch (recordType) {
|
247
|
+
default:
|
248
|
+
cname = OPTION_NAME_CELL_COLUMN;
|
249
|
+
cvalue = CellReference.convertNumToColString(index);
|
250
|
+
break;
|
251
|
+
case COLUMN:
|
252
|
+
cname = OPTION_NAME_CELL_ROW;
|
253
|
+
cvalue = Integer.toString(index + 1);
|
254
|
+
break;
|
255
|
+
case SHEET:
|
256
|
+
cname = "sheet";
|
257
|
+
cvalue = null;
|
258
|
+
break;
|
259
|
+
}
|
181
260
|
}
|
182
261
|
|
183
262
|
switch (valueType) {
|
@@ -186,43 +265,50 @@ public class PoiExcelColumnIndex {
|
|
186
265
|
case CELL_FORMULA:
|
187
266
|
case CELL_TYPE:
|
188
267
|
case CELL_CACHED_TYPE:
|
189
|
-
|
190
|
-
log.info("column.name={} <- {}={}, value_type={}", column.getName(), cname, cvalue, valueType);
|
268
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
191
269
|
break;
|
192
270
|
case CELL_STYLE:
|
193
271
|
case CELL_FONT:
|
194
272
|
case CELL_COMMENT:
|
195
273
|
String suffix = bean.getValueTypeSuffix();
|
196
274
|
if (suffix != null) {
|
197
|
-
log.info("column.name={} <- {}={},
|
198
|
-
valueType, suffix);
|
275
|
+
log.info("column.name={} <- {}={}, value={}[{}]", column.getName(), cname, cvalue, valueType, suffix);
|
199
276
|
} else {
|
200
|
-
log.info("column.name={} <- {}={},
|
201
|
-
valueType, suffix);
|
277
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
202
278
|
}
|
203
279
|
break;
|
204
280
|
|
281
|
+
case FILE_NAME:
|
282
|
+
log.info("column.name={} <- value={}", column.getName(), valueType);
|
283
|
+
break;
|
205
284
|
case SHEET_NAME:
|
206
285
|
if (cellAddress != null && cellAddress.getSheetName() != null) {
|
207
|
-
log.info("column.name={} <- {}={},
|
286
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
208
287
|
} else {
|
209
|
-
log.info("column.name={} <-
|
288
|
+
log.info("column.name={} <- value={}", column.getName(), valueType);
|
210
289
|
}
|
211
290
|
break;
|
212
291
|
case ROW_NUMBER:
|
213
|
-
if (cellAddress != null) {
|
214
|
-
log.info("column.name={} <- {}={},
|
292
|
+
if (cellAddress != null || cname.equals(OPTION_NAME_CELL_ROW)) {
|
293
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
294
|
+
} else {
|
295
|
+
log.info("column.name={} <- value={}", column.getName(), valueType);
|
296
|
+
}
|
297
|
+
break;
|
298
|
+
case COLUMN_NUMBER:
|
299
|
+
if (cellAddress != null || cname.equals(OPTION_NAME_CELL_COLUMN)) {
|
300
|
+
log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
|
215
301
|
} else {
|
216
|
-
log.info("column.name={} <-
|
302
|
+
log.info("column.name={} <- value={}", column.getName(), valueType);
|
217
303
|
}
|
218
304
|
break;
|
219
305
|
|
220
306
|
case CONSTANT:
|
221
307
|
String value = bean.getValueTypeSuffix();
|
222
308
|
if (value != null) {
|
223
|
-
log.info("column.name={} <-
|
309
|
+
log.info("column.name={} <- value={}[{}]", column.getName(), valueType, value);
|
224
310
|
} else {
|
225
|
-
log.info("column.name={} <-
|
311
|
+
log.info("column.name={} <- value={}({})", column.getName(), valueType, value);
|
226
312
|
}
|
227
313
|
break;
|
228
314
|
}
|
@@ -11,6 +11,7 @@ import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
|
|
11
11
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
|
12
12
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetCommonOptionTask;
|
13
13
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetOptionTask;
|
14
|
+
import org.embulk.parser.poi_excel.bean.record.RecordType;
|
14
15
|
import org.embulk.spi.Column;
|
15
16
|
import org.embulk.spi.ColumnConfig;
|
16
17
|
import org.embulk.spi.Schema;
|
@@ -75,13 +76,24 @@ public class PoiExcelSheetBean {
|
|
75
76
|
columnBeanList.add(bean);
|
76
77
|
}
|
77
78
|
|
78
|
-
new PoiExcelColumnIndex().initializeColumnIndex(task, columnBeanList);
|
79
|
+
new PoiExcelColumnIndex(this).initializeColumnIndex(task, columnBeanList);
|
79
80
|
}
|
80
81
|
|
81
82
|
public final List<SheetCommonOptionTask> getSheetOption() {
|
82
83
|
return sheetTaskList;
|
83
84
|
}
|
84
85
|
|
86
|
+
public RecordType getRecordType() {
|
87
|
+
List<SheetCommonOptionTask> list = getSheetOption();
|
88
|
+
for (SheetCommonOptionTask sheetTask : list) {
|
89
|
+
Optional<String> value = sheetTask.getRecordType();
|
90
|
+
if (value.isPresent()) {
|
91
|
+
return RecordType.of(value.get());
|
92
|
+
}
|
93
|
+
}
|
94
|
+
return RecordType.ROW;
|
95
|
+
}
|
96
|
+
|
85
97
|
public int getSkipHeaderLines() {
|
86
98
|
List<SheetCommonOptionTask> list = getSheetOption();
|
87
99
|
for (SheetCommonOptionTask sheetTask : list) {
|
@@ -0,0 +1,52 @@
|
|
1
|
+
package org.embulk.parser.poi_excel.bean.record;
|
2
|
+
|
3
|
+
import org.apache.poi.ss.usermodel.Cell;
|
4
|
+
import org.apache.poi.ss.usermodel.Sheet;
|
5
|
+
import org.apache.poi.ss.util.CellReference;
|
6
|
+
import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
|
7
|
+
|
8
|
+
public abstract class PoiExcelRecord {
|
9
|
+
|
10
|
+
// loop record
|
11
|
+
|
12
|
+
private Sheet sheet;
|
13
|
+
|
14
|
+
public final void initialize(Sheet sheet, int skipHeaderLines) {
|
15
|
+
this.sheet = sheet;
|
16
|
+
initializeLoop(skipHeaderLines);
|
17
|
+
}
|
18
|
+
|
19
|
+
protected abstract void initializeLoop(int skipHeaderLines);
|
20
|
+
|
21
|
+
public final Sheet getSheet() {
|
22
|
+
return sheet;
|
23
|
+
}
|
24
|
+
|
25
|
+
public abstract boolean exists();
|
26
|
+
|
27
|
+
public abstract void moveNext();
|
28
|
+
|
29
|
+
// current record
|
30
|
+
|
31
|
+
public final void logStart() {
|
32
|
+
logStartEnd("start");
|
33
|
+
}
|
34
|
+
|
35
|
+
public final void logEnd() {
|
36
|
+
logStartEnd("end");
|
37
|
+
}
|
38
|
+
|
39
|
+
protected abstract void logStartEnd(String part);
|
40
|
+
|
41
|
+
public abstract int getRowIndex(PoiExcelColumnBean bean);
|
42
|
+
|
43
|
+
public abstract int getColumnIndex(PoiExcelColumnBean bean);
|
44
|
+
|
45
|
+
public abstract Cell getCell(PoiExcelColumnBean bean);
|
46
|
+
|
47
|
+
public CellReference getCellReference(PoiExcelColumnBean bean) {
|
48
|
+
int rowIndex = getRowIndex(bean);
|
49
|
+
int columnIndex = getColumnIndex(bean);
|
50
|
+
return new CellReference(rowIndex, columnIndex);
|
51
|
+
}
|
52
|
+
}
|
@@ -0,0 +1,80 @@
|
|
1
|
+
package org.embulk.parser.poi_excel.bean.record;
|
2
|
+
|
3
|
+
import org.apache.poi.ss.usermodel.Cell;
|
4
|
+
import org.apache.poi.ss.usermodel.Row;
|
5
|
+
import org.apache.poi.ss.usermodel.Sheet;
|
6
|
+
import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.slf4j.Logger;
|
9
|
+
|
10
|
+
public class PoiExcelRecordColumn extends PoiExcelRecord {
|
11
|
+
private final Logger log = Exec.getLogger(getClass());
|
12
|
+
|
13
|
+
private int maxColumnIndex;
|
14
|
+
private int currentColumnIndex;
|
15
|
+
|
16
|
+
@Override
|
17
|
+
protected void initializeLoop(int skipHeaderLines) {
|
18
|
+
int minColumnIndex = Integer.MAX_VALUE;
|
19
|
+
maxColumnIndex = 0;
|
20
|
+
Sheet sheet = getSheet();
|
21
|
+
for (Row row : sheet) {
|
22
|
+
int firstIndex = row.getFirstCellNum();
|
23
|
+
if (firstIndex >= 0) {
|
24
|
+
minColumnIndex = Math.min(minColumnIndex, firstIndex);
|
25
|
+
}
|
26
|
+
maxColumnIndex = Math.max(maxColumnIndex, row.getLastCellNum());
|
27
|
+
}
|
28
|
+
|
29
|
+
this.currentColumnIndex = maxColumnIndex;
|
30
|
+
for (int i = minColumnIndex; i < maxColumnIndex; i++) {
|
31
|
+
if (i < skipHeaderLines) {
|
32
|
+
if (log.isDebugEnabled()) {
|
33
|
+
log.debug("column({}) skipped", i);
|
34
|
+
}
|
35
|
+
continue;
|
36
|
+
}
|
37
|
+
|
38
|
+
this.currentColumnIndex = i;
|
39
|
+
break;
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public boolean exists() {
|
45
|
+
return currentColumnIndex < maxColumnIndex;
|
46
|
+
}
|
47
|
+
|
48
|
+
@Override
|
49
|
+
public void moveNext() {
|
50
|
+
currentColumnIndex++;
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
protected void logStartEnd(String part) {
|
55
|
+
if (log.isDebugEnabled()) {
|
56
|
+
log.debug("column({}) {}", currentColumnIndex, part);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
@Override
|
61
|
+
public int getRowIndex(PoiExcelColumnBean bean) {
|
62
|
+
return bean.getColumnIndex();
|
63
|
+
}
|
64
|
+
|
65
|
+
@Override
|
66
|
+
public int getColumnIndex(PoiExcelColumnBean bean) {
|
67
|
+
return currentColumnIndex;
|
68
|
+
}
|
69
|
+
|
70
|
+
@Override
|
71
|
+
public Cell getCell(PoiExcelColumnBean bean) {
|
72
|
+
int rowIndex = getRowIndex(bean);
|
73
|
+
Row row = getSheet().getRow(rowIndex);
|
74
|
+
if (row == null) {
|
75
|
+
return null;
|
76
|
+
}
|
77
|
+
int columnIndex = getColumnIndex(bean);
|
78
|
+
return row.getCell(columnIndex);
|
79
|
+
}
|
80
|
+
}
|
@@ -0,0 +1,76 @@
|
|
1
|
+
package org.embulk.parser.poi_excel.bean.record;
|
2
|
+
|
3
|
+
import java.util.Iterator;
|
4
|
+
|
5
|
+
import org.apache.poi.ss.usermodel.Cell;
|
6
|
+
import org.apache.poi.ss.usermodel.Row;
|
7
|
+
import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
|
11
|
+
public class PoiExcelRecordRow extends PoiExcelRecord {
|
12
|
+
private final Logger log = Exec.getLogger(getClass());
|
13
|
+
|
14
|
+
private Iterator<Row> rowIterator;
|
15
|
+
private Row currentRow;
|
16
|
+
|
17
|
+
@Override
|
18
|
+
protected void initializeLoop(int skipHeaderLines) {
|
19
|
+
this.rowIterator = getSheet().iterator();
|
20
|
+
this.currentRow = null;
|
21
|
+
while (rowIterator.hasNext()) {
|
22
|
+
Row row = rowIterator.next();
|
23
|
+
|
24
|
+
int rowIndex = row.getRowNum();
|
25
|
+
if (rowIndex < skipHeaderLines) {
|
26
|
+
if (log.isDebugEnabled()) {
|
27
|
+
log.debug("row({}) skipped", rowIndex);
|
28
|
+
}
|
29
|
+
continue;
|
30
|
+
}
|
31
|
+
|
32
|
+
this.currentRow = row;
|
33
|
+
break;
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public boolean exists() {
|
39
|
+
return currentRow != null;
|
40
|
+
}
|
41
|
+
|
42
|
+
@Override
|
43
|
+
public void moveNext() {
|
44
|
+
if (rowIterator.hasNext()) {
|
45
|
+
this.currentRow = rowIterator.next();
|
46
|
+
} else {
|
47
|
+
this.currentRow = null;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
@Override
|
52
|
+
protected void logStartEnd(String part) {
|
53
|
+
assert currentRow != null;
|
54
|
+
if (log.isDebugEnabled()) {
|
55
|
+
log.debug("row({}) {}", currentRow.getRowNum(), part);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public int getRowIndex(PoiExcelColumnBean bean) {
|
61
|
+
assert currentRow != null;
|
62
|
+
return currentRow.getRowNum();
|
63
|
+
}
|
64
|
+
|
65
|
+
@Override
|
66
|
+
public int getColumnIndex(PoiExcelColumnBean bean) {
|
67
|
+
return bean.getColumnIndex();
|
68
|
+
}
|
69
|
+
|
70
|
+
@Override
|
71
|
+
public Cell getCell(PoiExcelColumnBean bean) {
|
72
|
+
assert currentRow != null;
|
73
|
+
int columnIndex = getColumnIndex(bean);
|
74
|
+
return currentRow.getCell(columnIndex);
|
75
|
+
}
|
76
|
+
}
|