embulk-parser-poi_excel 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +21 -21
  3. data/README.md +247 -233
  4. data/build.gradle +92 -86
  5. data/classpath/{embulk-parser-poi_excel-0.1.11.jar → embulk-parser-poi_excel-0.1.13.jar} +0 -0
  6. data/gradlew +172 -172
  7. data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +23 -3
  8. data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +38 -23
  9. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +23 -1
  10. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +114 -28
  11. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelSheetBean.java +13 -1
  12. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecord.java +52 -0
  13. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordColumn.java +80 -0
  14. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordRow.java +76 -0
  15. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordSheet.java +55 -0
  16. data/src/main/java/org/embulk/parser/poi_excel/bean/record/RecordType.java +114 -0
  17. data/src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java +18 -9
  18. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java +8 -1
  19. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +26 -26
  20. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorValue.java +28 -1
  21. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +9 -0
  22. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_recordType.java +192 -0
  23. metadata +9 -3
@@ -10,7 +10,6 @@ import java.util.regex.Pattern;
10
10
 
11
11
  import org.apache.poi.EncryptedDocumentException;
12
12
  import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
13
- import org.apache.poi.ss.usermodel.Row;
14
13
  import org.apache.poi.ss.usermodel.Sheet;
15
14
  import org.apache.poi.ss.usermodel.Workbook;
16
15
  import org.apache.poi.ss.usermodel.WorkbookFactory;
@@ -20,6 +19,8 @@ import org.embulk.config.ConfigException;
20
19
  import org.embulk.config.ConfigSource;
21
20
  import org.embulk.config.Task;
22
21
  import org.embulk.config.TaskSource;
22
+ import org.embulk.parser.poi_excel.bean.PoiExcelSheetBean;
23
+ import org.embulk.parser.poi_excel.bean.record.PoiExcelRecord;
23
24
  import org.embulk.parser.poi_excel.visitor.PoiExcelColumnVisitor;
24
25
  import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorFactory;
25
26
  import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue;
@@ -69,6 +70,10 @@ public class PoiExcelParserPlugin implements ParserPlugin {
69
70
 
70
71
  public interface SheetCommonOptionTask extends Task, ColumnCommonOptionTask {
71
72
 
73
+ @Config("record_type")
74
+ @ConfigDefault("null")
75
+ public Optional<String> getRecordType();
76
+
72
77
  @Config("skip_header_lines")
73
78
  @ConfigDefault("null")
74
79
  public Optional<Integer> getSkipHeaderLines();
@@ -91,11 +96,25 @@ public class PoiExcelParserPlugin implements ParserPlugin {
91
96
  @ConfigDefault("null")
92
97
  public Optional<String> getValueType();
93
98
 
94
- // A,B,... or number(1 origin)
99
+ // same as cell_column
95
100
  @Config("column_number")
96
101
  @ConfigDefault("null")
97
102
  public Optional<String> getColumnNumber();
98
103
 
104
+ public static final String CELL_COLUMN = "cell_column";
105
+
106
+ // A,B,... or number(1 origin)
107
+ @Config(CELL_COLUMN)
108
+ @ConfigDefault("null")
109
+ public Optional<String> getCellColumn();
110
+
111
+ public static final String CELL_ROW = "cell_row";
112
+
113
+ // number(1 origin)
114
+ @Config(CELL_ROW)
115
+ @ConfigDefault("null")
116
+ public Optional<String> getCellRow();
117
+
99
118
  // A1,B2,... or Sheet1!A1
100
119
  @Config("cell_address")
101
120
  @ConfigDefault("null")
@@ -186,7 +205,7 @@ public class PoiExcelParserPlugin implements ParserPlugin {
186
205
  if (log.isDebugEnabled()) {
187
206
  log.debug("resolved sheet names={}", list);
188
207
  }
189
- run(task, schema, workbook, list, output);
208
+ run(task, schema, input, workbook, list, output);
190
209
  }
191
210
  }
192
211
  }
@@ -238,7 +257,8 @@ public class PoiExcelParserPlugin implements ParserPlugin {
238
257
  return new ArrayList<>(set);
239
258
  }
240
259
 
241
- protected void run(PluginTask task, Schema schema, Workbook workbook, List<String> sheetNames, PageOutput output) {
260
+ protected void run(PluginTask task, Schema schema, FileInput input, Workbook workbook, List<String> sheetNames,
261
+ PageOutput output) {
242
262
  final int flushCount = task.getFlushCount();
243
263
 
244
264
  try (PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
@@ -254,23 +274,20 @@ public class PoiExcelParserPlugin implements ParserPlugin {
254
274
  }
255
275
 
256
276
  log.info("sheet={}", sheetName);
257
- PoiExcelVisitorFactory factory = newPoiExcelVisitorFactory(task, schema, sheet, pageBuilder);
277
+ PoiExcelVisitorFactory factory = newPoiExcelVisitorFactory(task, schema, input, sheet, pageBuilder);
258
278
  PoiExcelColumnVisitor visitor = factory.getPoiExcelColumnVisitor();
259
- final int skipHeaderLines = factory.getVisitorValue().getSheetBean().getSkipHeaderLines();
279
+ PoiExcelSheetBean sheetBean = factory.getVisitorValue().getSheetBean();
280
+ final int skipHeaderLines = sheetBean.getSkipHeaderLines();
281
+
282
+ PoiExcelRecord record = sheetBean.getRecordType().newPoiExcelRecord();
283
+ record.initialize(sheet, skipHeaderLines);
284
+ visitor.setRecord(record);
260
285
 
261
286
  int count = 0;
262
- for (Row row : sheet) {
263
- int rowIndex = row.getRowNum();
264
- if (rowIndex < skipHeaderLines) {
265
- log.debug("row({}) skipped", rowIndex);
266
- continue;
267
- }
268
- if (log.isDebugEnabled()) {
269
- log.debug("row({}) start", rowIndex);
270
- }
287
+ for (; record.exists(); record.moveNext()) {
288
+ record.logStart();
271
289
 
272
- visitor.setRow(row);
273
- schema.visitColumns(visitor);
290
+ schema.visitColumns(visitor); // use record
274
291
  pageBuilder.addRecord();
275
292
 
276
293
  if (++count >= flushCount) {
@@ -279,9 +296,7 @@ public class PoiExcelParserPlugin implements ParserPlugin {
279
296
  count = 0;
280
297
  }
281
298
 
282
- if (log.isDebugEnabled()) {
283
- log.debug("row({}) end", rowIndex);
284
- }
299
+ record.logEnd();
285
300
  }
286
301
  pageBuilder.flush();
287
302
  }
@@ -289,9 +304,9 @@ public class PoiExcelParserPlugin implements ParserPlugin {
289
304
  }
290
305
  }
291
306
 
292
- protected PoiExcelVisitorFactory newPoiExcelVisitorFactory(PluginTask task, Schema schema, Sheet sheet,
293
- PageBuilder pageBuilder) {
294
- PoiExcelVisitorValue visitorValue = new PoiExcelVisitorValue(task, schema, sheet, pageBuilder);
307
+ protected PoiExcelVisitorFactory newPoiExcelVisitorFactory(PluginTask task, Schema schema, FileInput input,
308
+ Sheet sheet, PageBuilder pageBuilder) {
309
+ PoiExcelVisitorValue visitorValue = new PoiExcelVisitorValue(task, schema, input, sheet, pageBuilder);
295
310
  return new PoiExcelVisitorFactory(visitorValue);
296
311
  }
297
312
  }
@@ -5,6 +5,7 @@ import java.util.ArrayList;
5
5
  import java.util.Collections;
6
6
  import java.util.List;
7
7
 
8
+ import org.apache.poi.ss.util.CellReference;
8
9
  import org.embulk.config.ConfigException;
9
10
  import org.embulk.parser.poi_excel.PoiExcelColumnValueType;
10
11
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnCommonOptionTask;
@@ -101,6 +102,12 @@ public class PoiExcelColumnBean {
101
102
  }
102
103
 
103
104
  public Optional<String> getColumnNumber() {
105
+ for (ColumnOptionTask task : columnTaskList) {
106
+ Optional<String> option = task.getCellColumn();
107
+ if (option.isPresent()) {
108
+ return option;
109
+ }
110
+ }
104
111
  for (ColumnOptionTask task : columnTaskList) {
105
112
  Optional<String> option = task.getColumnNumber();
106
113
  if (option.isPresent()) {
@@ -110,6 +117,16 @@ public class PoiExcelColumnBean {
110
117
  return Optional.absent();
111
118
  }
112
119
 
120
+ public Optional<String> getRowNumber() {
121
+ for (ColumnOptionTask task : columnTaskList) {
122
+ Optional<String> option = task.getCellRow();
123
+ if (option.isPresent()) {
124
+ return option;
125
+ }
126
+ }
127
+ return Optional.absent();
128
+ }
129
+
113
130
  private Optional<PoiExcelCellAddress> cellAddress;
114
131
 
115
132
  public PoiExcelCellAddress getCellAddress() {
@@ -123,12 +140,17 @@ public class PoiExcelColumnBean {
123
140
  for (ColumnOptionTask task : columnTaskList) {
124
141
  Optional<String> option = task.getCellAddress();
125
142
  if (option.isPresent()) {
126
- return Optional.of(new PoiExcelCellAddress(option.get()));
143
+ CellReference ref = new CellReference(option.get());
144
+ return Optional.of(new PoiExcelCellAddress(ref));
127
145
  }
128
146
  }
129
147
  return Optional.absent();
130
148
  }
131
149
 
150
+ public void setCellAddress(CellReference ref) {
151
+ this.cellAddress = Optional.of(new PoiExcelCellAddress(ref));
152
+ }
153
+
132
154
  protected abstract class CacheValue<T> {
133
155
  private T value;
134
156
 
@@ -7,7 +7,9 @@ import java.util.Map;
7
7
 
8
8
  import org.apache.poi.ss.util.CellReference;
9
9
  import org.embulk.parser.poi_excel.PoiExcelColumnValueType;
10
+ import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
10
11
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
12
+ import org.embulk.parser.poi_excel.bean.record.RecordType;
11
13
  import org.embulk.parser.poi_excel.bean.util.PoiExcelCellAddress;
12
14
  import org.embulk.spi.Column;
13
15
  import org.embulk.spi.Exec;
@@ -19,18 +21,26 @@ import com.google.common.base.Optional;
19
21
  public class PoiExcelColumnIndex {
20
22
  private final Logger log = Exec.getLogger(getClass());
21
23
 
24
+ protected final RecordType recordType;
22
25
  protected final Map<String, Integer> indexMap = new LinkedHashMap<>();
23
26
 
27
+ public PoiExcelColumnIndex(PoiExcelSheetBean sheetBean) {
28
+ this.recordType = sheetBean.getRecordType();
29
+ }
30
+
24
31
  public void initializeColumnIndex(PluginTask task, List<PoiExcelColumnBean> beanList) {
32
+ log.info("record_type={}", recordType);
33
+
25
34
  int index = -1;
26
35
  indexMap.clear();
27
36
 
28
37
  Schema schema = task.getColumns().toSchema();
29
38
  for (Column column : schema.getColumns()) {
30
39
  PoiExcelColumnBean bean = beanList.get(column.getIndex());
31
- PoiExcelColumnValueType valueType = bean.getValueType();
40
+ initializeCellAddress(column, bean);
32
41
 
33
- if (valueType.useCell()) {
42
+ PoiExcelColumnValueType valueType = bean.getValueType();
43
+ if (valueType.useCell(recordType)) {
34
44
  index = resolveColumnIndex(column, bean, index, valueType);
35
45
  if (index < 0) {
36
46
  index = 0;
@@ -39,21 +49,74 @@ public class PoiExcelColumnIndex {
39
49
  indexMap.put(column.getName(), index);
40
50
  }
41
51
 
52
+ initializeCellAddress2(column, bean, index);
53
+
42
54
  if (log.isInfoEnabled()) {
43
55
  logColumn(column, bean, valueType, index);
44
56
  }
45
57
  }
46
58
  }
47
59
 
60
+ protected void initializeCellAddress(Column column, PoiExcelColumnBean bean) {
61
+ if (bean.getCellAddress() != null) {
62
+ return;
63
+ }
64
+
65
+ Optional<String> rowOption = bean.getRowNumber();
66
+ Optional<String> colOption = bean.getColumnNumber();
67
+ if (rowOption.isPresent() && colOption.isPresent()) {
68
+ String rowNumber = rowOption.get();
69
+ String colNumber = colOption.get();
70
+ initializeCellAddress(column, bean, rowNumber, colNumber);
71
+ return;
72
+ }
73
+
74
+ if (recordType == RecordType.SHEET) {
75
+ String rowNumber = rowOption.or("1");
76
+ String colNumber = colOption.or("A");
77
+ initializeCellAddress(column, bean, rowNumber, colNumber);
78
+ return;
79
+ }
80
+ }
81
+
82
+ protected void initializeCellAddress(Column column, PoiExcelColumnBean bean, String rowNumber, String columnNumber) {
83
+ int columnIndex = convertColumnIndex(column, OPTION_NAME_CELL_COLUMN, columnNumber);
84
+ int rowIndex = convertColumnIndex(column, OPTION_NAME_CELL_ROW, rowNumber);
85
+ CellReference ref = new CellReference(rowIndex, columnIndex);
86
+ bean.setCellAddress(ref);
87
+ }
88
+
89
+ protected void initializeCellAddress2(Column column, PoiExcelColumnBean bean, int index) {
90
+ if (bean.getCellAddress() != null) {
91
+ return;
92
+ }
93
+
94
+ Optional<String> recordOption = recordType.getRecordOption(bean);
95
+ if (recordOption.isPresent()) {
96
+ int rowIndex, columnIndex;
97
+ switch (recordType) {
98
+ case ROW:
99
+ rowIndex = convertColumnIndex(column, recordType.getRecordOptionName(), recordOption.get());
100
+ columnIndex = (index >= 0) ? index : 0;
101
+ break;
102
+ case COLUMN:
103
+ rowIndex = (index >= 0) ? index : 0;
104
+ columnIndex = convertColumnIndex(column, recordType.getRecordOptionName(), recordOption.get());
105
+ break;
106
+ default:
107
+ throw new IllegalStateException();
108
+ }
109
+ CellReference ref = new CellReference(rowIndex, columnIndex);
110
+ bean.setCellAddress(ref);
111
+ }
112
+ }
113
+
48
114
  protected int resolveColumnIndex(Column column, PoiExcelColumnBean bean, int index,
49
115
  PoiExcelColumnValueType valueType) {
50
- Optional<String> numberOption = bean.getColumnNumber();
116
+ Optional<String> numberOption = recordType.getNumberOption(bean);
51
117
  PoiExcelCellAddress cellAddress = bean.getCellAddress();
52
118
 
53
119
  if (cellAddress != null) {
54
- if (numberOption.isPresent()) {
55
- throw new RuntimeException("only one of column_number, cell_address can be specified");
56
- }
57
120
  return index;
58
121
  }
59
122
 
@@ -73,7 +136,7 @@ public class PoiExcelColumnIndex {
73
136
  break;
74
137
  }
75
138
  }
76
- return convertColumnIndex(column, columnNumber);
139
+ return convertColumnIndex(column, recordType.getNumberOptionName(), columnNumber);
77
140
  } else {
78
141
  if (valueType.nextIndex()) {
79
142
  index++;
@@ -144,11 +207,12 @@ public class PoiExcelColumnIndex {
144
207
 
145
208
  protected void checkIndex(Column column, int index) {
146
209
  if (index < 0) {
147
- throw new RuntimeException(MessageFormat.format("column_number out of range at {0}", column));
210
+ throw new RuntimeException(MessageFormat.format("{0} out of range at {1}",
211
+ recordType.getNumberOptionName(), column));
148
212
  }
149
213
  }
150
214
 
151
- protected int convertColumnIndex(Column column, String columnNumber) {
215
+ protected int convertColumnIndex(Column column, String numberOptionName, String columnNumber) {
152
216
  int index;
153
217
  try {
154
218
  char c = columnNumber.charAt(0);
@@ -158,16 +222,19 @@ public class PoiExcelColumnIndex {
158
222
  index = CellReference.convertColStringToIndex(columnNumber);
159
223
  }
160
224
  } catch (Exception e) {
161
- throw new RuntimeException(MessageFormat.format("illegal column_number=\"{0}\" at {1}", columnNumber,
162
- column), e);
225
+ throw new RuntimeException(MessageFormat.format("illegal {0}=\"{1}\" at {2}", numberOptionName,
226
+ columnNumber, column), e);
163
227
  }
164
228
  if (index < 0) {
165
- throw new RuntimeException(MessageFormat.format("illegal column_number=\"{0}\" at {1}", columnNumber,
166
- column));
229
+ throw new RuntimeException(MessageFormat.format("illegal {0}=\"{1}\" at {2}", numberOptionName,
230
+ columnNumber, column));
167
231
  }
168
232
  return index;
169
233
  }
170
234
 
235
+ private static final String OPTION_NAME_CELL_COLUMN = ColumnOptionTask.CELL_COLUMN;
236
+ private static final String OPTION_NAME_CELL_ROW = ColumnOptionTask.CELL_ROW;
237
+
171
238
  protected void logColumn(Column column, PoiExcelColumnBean bean, PoiExcelColumnValueType valueType, int index) {
172
239
  PoiExcelCellAddress cellAddress = bean.getCellAddress();
173
240
 
@@ -176,8 +243,20 @@ public class PoiExcelColumnIndex {
176
243
  cname = "cell_address";
177
244
  cvalue = cellAddress.getString();
178
245
  } else {
179
- cname = "cell_column";
180
- cvalue = CellReference.convertNumToColString(index);
246
+ switch (recordType) {
247
+ default:
248
+ cname = OPTION_NAME_CELL_COLUMN;
249
+ cvalue = CellReference.convertNumToColString(index);
250
+ break;
251
+ case COLUMN:
252
+ cname = OPTION_NAME_CELL_ROW;
253
+ cvalue = Integer.toString(index + 1);
254
+ break;
255
+ case SHEET:
256
+ cname = "sheet";
257
+ cvalue = null;
258
+ break;
259
+ }
181
260
  }
182
261
 
183
262
  switch (valueType) {
@@ -186,43 +265,50 @@ public class PoiExcelColumnIndex {
186
265
  case CELL_FORMULA:
187
266
  case CELL_TYPE:
188
267
  case CELL_CACHED_TYPE:
189
- case COLUMN_NUMBER:
190
- log.info("column.name={} <- {}={}, value_type={}", column.getName(), cname, cvalue, valueType);
268
+ log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
191
269
  break;
192
270
  case CELL_STYLE:
193
271
  case CELL_FONT:
194
272
  case CELL_COMMENT:
195
273
  String suffix = bean.getValueTypeSuffix();
196
274
  if (suffix != null) {
197
- log.info("column.name={} <- {}={}, value_type={}, value=[{}]", column.getName(), cname, cvalue,
198
- valueType, suffix);
275
+ log.info("column.name={} <- {}={}, value={}[{}]", column.getName(), cname, cvalue, valueType, suffix);
199
276
  } else {
200
- log.info("column.name={} <- {}={}, value_type={}, value={}", column.getName(), cname, cvalue,
201
- valueType, suffix);
277
+ log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
202
278
  }
203
279
  break;
204
280
 
281
+ case FILE_NAME:
282
+ log.info("column.name={} <- value={}", column.getName(), valueType);
283
+ break;
205
284
  case SHEET_NAME:
206
285
  if (cellAddress != null && cellAddress.getSheetName() != null) {
207
- log.info("column.name={} <- {}={}, value_type={}", column.getName(), cname, cvalue, valueType);
286
+ log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
208
287
  } else {
209
- log.info("column.name={} <- value_type={}", column.getName(), valueType);
288
+ log.info("column.name={} <- value={}", column.getName(), valueType);
210
289
  }
211
290
  break;
212
291
  case ROW_NUMBER:
213
- if (cellAddress != null) {
214
- log.info("column.name={} <- {}={}, value_type={}", column.getName(), cname, cvalue, valueType);
292
+ if (cellAddress != null || cname.equals(OPTION_NAME_CELL_ROW)) {
293
+ log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
294
+ } else {
295
+ log.info("column.name={} <- value={}", column.getName(), valueType);
296
+ }
297
+ break;
298
+ case COLUMN_NUMBER:
299
+ if (cellAddress != null || cname.equals(OPTION_NAME_CELL_COLUMN)) {
300
+ log.info("column.name={} <- {}={}, value={}", column.getName(), cname, cvalue, valueType);
215
301
  } else {
216
- log.info("column.name={} <- value_type={}", column.getName(), valueType);
302
+ log.info("column.name={} <- value={}", column.getName(), valueType);
217
303
  }
218
304
  break;
219
305
 
220
306
  case CONSTANT:
221
307
  String value = bean.getValueTypeSuffix();
222
308
  if (value != null) {
223
- log.info("column.name={} <- value_type={}, value=[{}]", column.getName(), valueType, value);
309
+ log.info("column.name={} <- value={}[{}]", column.getName(), valueType, value);
224
310
  } else {
225
- log.info("column.name={} <- value_type={}, value={}", column.getName(), valueType, value);
311
+ log.info("column.name={} <- value={}({})", column.getName(), valueType, value);
226
312
  }
227
313
  break;
228
314
  }
@@ -11,6 +11,7 @@ import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
11
11
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
12
12
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetCommonOptionTask;
13
13
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetOptionTask;
14
+ import org.embulk.parser.poi_excel.bean.record.RecordType;
14
15
  import org.embulk.spi.Column;
15
16
  import org.embulk.spi.ColumnConfig;
16
17
  import org.embulk.spi.Schema;
@@ -75,13 +76,24 @@ public class PoiExcelSheetBean {
75
76
  columnBeanList.add(bean);
76
77
  }
77
78
 
78
- new PoiExcelColumnIndex().initializeColumnIndex(task, columnBeanList);
79
+ new PoiExcelColumnIndex(this).initializeColumnIndex(task, columnBeanList);
79
80
  }
80
81
 
81
82
  public final List<SheetCommonOptionTask> getSheetOption() {
82
83
  return sheetTaskList;
83
84
  }
84
85
 
86
+ public RecordType getRecordType() {
87
+ List<SheetCommonOptionTask> list = getSheetOption();
88
+ for (SheetCommonOptionTask sheetTask : list) {
89
+ Optional<String> value = sheetTask.getRecordType();
90
+ if (value.isPresent()) {
91
+ return RecordType.of(value.get());
92
+ }
93
+ }
94
+ return RecordType.ROW;
95
+ }
96
+
85
97
  public int getSkipHeaderLines() {
86
98
  List<SheetCommonOptionTask> list = getSheetOption();
87
99
  for (SheetCommonOptionTask sheetTask : list) {
@@ -0,0 +1,52 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import org.apache.poi.ss.usermodel.Cell;
4
+ import org.apache.poi.ss.usermodel.Sheet;
5
+ import org.apache.poi.ss.util.CellReference;
6
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
7
+
8
+ public abstract class PoiExcelRecord {
9
+
10
+ // loop record
11
+
12
+ private Sheet sheet;
13
+
14
+ public final void initialize(Sheet sheet, int skipHeaderLines) {
15
+ this.sheet = sheet;
16
+ initializeLoop(skipHeaderLines);
17
+ }
18
+
19
+ protected abstract void initializeLoop(int skipHeaderLines);
20
+
21
+ public final Sheet getSheet() {
22
+ return sheet;
23
+ }
24
+
25
+ public abstract boolean exists();
26
+
27
+ public abstract void moveNext();
28
+
29
+ // current record
30
+
31
+ public final void logStart() {
32
+ logStartEnd("start");
33
+ }
34
+
35
+ public final void logEnd() {
36
+ logStartEnd("end");
37
+ }
38
+
39
+ protected abstract void logStartEnd(String part);
40
+
41
+ public abstract int getRowIndex(PoiExcelColumnBean bean);
42
+
43
+ public abstract int getColumnIndex(PoiExcelColumnBean bean);
44
+
45
+ public abstract Cell getCell(PoiExcelColumnBean bean);
46
+
47
+ public CellReference getCellReference(PoiExcelColumnBean bean) {
48
+ int rowIndex = getRowIndex(bean);
49
+ int columnIndex = getColumnIndex(bean);
50
+ return new CellReference(rowIndex, columnIndex);
51
+ }
52
+ }
@@ -0,0 +1,80 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import org.apache.poi.ss.usermodel.Cell;
4
+ import org.apache.poi.ss.usermodel.Row;
5
+ import org.apache.poi.ss.usermodel.Sheet;
6
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
7
+ import org.embulk.spi.Exec;
8
+ import org.slf4j.Logger;
9
+
10
+ public class PoiExcelRecordColumn extends PoiExcelRecord {
11
+ private final Logger log = Exec.getLogger(getClass());
12
+
13
+ private int maxColumnIndex;
14
+ private int currentColumnIndex;
15
+
16
+ @Override
17
+ protected void initializeLoop(int skipHeaderLines) {
18
+ int minColumnIndex = Integer.MAX_VALUE;
19
+ maxColumnIndex = 0;
20
+ Sheet sheet = getSheet();
21
+ for (Row row : sheet) {
22
+ int firstIndex = row.getFirstCellNum();
23
+ if (firstIndex >= 0) {
24
+ minColumnIndex = Math.min(minColumnIndex, firstIndex);
25
+ }
26
+ maxColumnIndex = Math.max(maxColumnIndex, row.getLastCellNum());
27
+ }
28
+
29
+ this.currentColumnIndex = maxColumnIndex;
30
+ for (int i = minColumnIndex; i < maxColumnIndex; i++) {
31
+ if (i < skipHeaderLines) {
32
+ if (log.isDebugEnabled()) {
33
+ log.debug("column({}) skipped", i);
34
+ }
35
+ continue;
36
+ }
37
+
38
+ this.currentColumnIndex = i;
39
+ break;
40
+ }
41
+ }
42
+
43
+ @Override
44
+ public boolean exists() {
45
+ return currentColumnIndex < maxColumnIndex;
46
+ }
47
+
48
+ @Override
49
+ public void moveNext() {
50
+ currentColumnIndex++;
51
+ }
52
+
53
+ @Override
54
+ protected void logStartEnd(String part) {
55
+ if (log.isDebugEnabled()) {
56
+ log.debug("column({}) {}", currentColumnIndex, part);
57
+ }
58
+ }
59
+
60
+ @Override
61
+ public int getRowIndex(PoiExcelColumnBean bean) {
62
+ return bean.getColumnIndex();
63
+ }
64
+
65
+ @Override
66
+ public int getColumnIndex(PoiExcelColumnBean bean) {
67
+ return currentColumnIndex;
68
+ }
69
+
70
+ @Override
71
+ public Cell getCell(PoiExcelColumnBean bean) {
72
+ int rowIndex = getRowIndex(bean);
73
+ Row row = getSheet().getRow(rowIndex);
74
+ if (row == null) {
75
+ return null;
76
+ }
77
+ int columnIndex = getColumnIndex(bean);
78
+ return row.getCell(columnIndex);
79
+ }
80
+ }
@@ -0,0 +1,76 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import java.util.Iterator;
4
+
5
+ import org.apache.poi.ss.usermodel.Cell;
6
+ import org.apache.poi.ss.usermodel.Row;
7
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
8
+ import org.embulk.spi.Exec;
9
+ import org.slf4j.Logger;
10
+
11
+ public class PoiExcelRecordRow extends PoiExcelRecord {
12
+ private final Logger log = Exec.getLogger(getClass());
13
+
14
+ private Iterator<Row> rowIterator;
15
+ private Row currentRow;
16
+
17
+ @Override
18
+ protected void initializeLoop(int skipHeaderLines) {
19
+ this.rowIterator = getSheet().iterator();
20
+ this.currentRow = null;
21
+ while (rowIterator.hasNext()) {
22
+ Row row = rowIterator.next();
23
+
24
+ int rowIndex = row.getRowNum();
25
+ if (rowIndex < skipHeaderLines) {
26
+ if (log.isDebugEnabled()) {
27
+ log.debug("row({}) skipped", rowIndex);
28
+ }
29
+ continue;
30
+ }
31
+
32
+ this.currentRow = row;
33
+ break;
34
+ }
35
+ }
36
+
37
+ @Override
38
+ public boolean exists() {
39
+ return currentRow != null;
40
+ }
41
+
42
+ @Override
43
+ public void moveNext() {
44
+ if (rowIterator.hasNext()) {
45
+ this.currentRow = rowIterator.next();
46
+ } else {
47
+ this.currentRow = null;
48
+ }
49
+ }
50
+
51
+ @Override
52
+ protected void logStartEnd(String part) {
53
+ assert currentRow != null;
54
+ if (log.isDebugEnabled()) {
55
+ log.debug("row({}) {}", currentRow.getRowNum(), part);
56
+ }
57
+ }
58
+
59
+ @Override
60
+ public int getRowIndex(PoiExcelColumnBean bean) {
61
+ assert currentRow != null;
62
+ return currentRow.getRowNum();
63
+ }
64
+
65
+ @Override
66
+ public int getColumnIndex(PoiExcelColumnBean bean) {
67
+ return bean.getColumnIndex();
68
+ }
69
+
70
+ @Override
71
+ public Cell getCell(PoiExcelColumnBean bean) {
72
+ assert currentRow != null;
73
+ int columnIndex = getColumnIndex(bean);
74
+ return currentRow.getCell(columnIndex);
75
+ }
76
+ }