embulk-parser-poi_excel 0.1.7 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -6
  3. data/build.gradle +16 -8
  4. data/classpath/{commons-codec-1.9.jar → commons-codec-1.10.jar} +0 -0
  5. data/classpath/commons-collections4-4.1.jar +0 -0
  6. data/classpath/curvesapi-1.04.jar +0 -0
  7. data/classpath/embulk-parser-poi_excel-0.1.10.jar +0 -0
  8. data/classpath/poi-3.17.jar +0 -0
  9. data/classpath/poi-ooxml-3.17.jar +0 -0
  10. data/classpath/{poi-ooxml-schemas-3.13.jar → poi-ooxml-schemas-3.17.jar} +0 -0
  11. data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +6 -1
  12. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +51 -6
  13. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +74 -39
  14. data/src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java +50 -0
  15. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java +0 -6
  16. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java +11 -11
  17. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java +24 -10
  18. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java +74 -44
  19. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java +1 -1
  20. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +45 -9
  21. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/BooleanCellVisitor.java +5 -0
  22. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +3 -0
  23. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/DoubleCellVisitor.java +5 -0
  24. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/LongCellVisitor.java +5 -0
  25. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java +5 -0
  26. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/TimestampCellVisitor.java +5 -0
  27. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionMap.java +51 -0
  28. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java +1 -51
  29. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellAddress.java +69 -0
  30. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellComment.java +1 -1
  31. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java +1 -1
  32. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java +1 -1
  33. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java +14 -14
  34. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java +15 -14
  35. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java +1 -1
  36. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java +1 -1
  37. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java +1 -1
  38. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_formula.java +1 -1
  39. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_mergedCell.java +89 -0
  40. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java +1 -1
  41. metadata +23 -17
  42. data/classpath/embulk-parser-poi_excel-0.1.7.jar +0 -0
  43. data/classpath/poi-3.13.jar +0 -0
  44. data/classpath/poi-ooxml-3.13.jar +0 -0
@@ -4,6 +4,7 @@ import java.text.MessageFormat;
4
4
  import java.util.List;
5
5
 
6
6
  import org.apache.poi.ss.usermodel.Cell;
7
+ import org.apache.poi.ss.usermodel.CellType;
7
8
  import org.apache.poi.ss.usermodel.CellValue;
8
9
  import org.apache.poi.ss.usermodel.CreationHelper;
9
10
  import org.apache.poi.ss.usermodel.FormulaError;
@@ -17,7 +18,9 @@ import org.embulk.parser.poi_excel.PoiExcelParserPlugin.FormulaReplaceTask;
17
18
  import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
18
19
  import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.ErrorStrategy;
19
20
  import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.FormulaHandling;
21
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.SearchMergedCell;
20
22
  import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor;
23
+ import org.embulk.parser.poi_excel.visitor.util.MergedRegionMap;
21
24
  import org.embulk.spi.Column;
22
25
  import org.embulk.spi.Exec;
23
26
  import org.embulk.spi.PageBuilder;
@@ -39,15 +42,15 @@ public class PoiExcelCellValueVisitor {
39
42
 
40
43
  Column column = bean.getColumn();
41
44
 
42
- int cellType = cell.getCellType();
45
+ CellType cellType = cell.getCellTypeEnum();
43
46
  switch (cellType) {
44
- case Cell.CELL_TYPE_NUMERIC:
47
+ case NUMERIC:
45
48
  visitor.visitCellValueNumeric(column, cell, cell.getNumericCellValue());
46
49
  return;
47
- case Cell.CELL_TYPE_STRING:
50
+ case STRING:
48
51
  visitor.visitCellValueString(column, cell, cell.getStringCellValue());
49
52
  return;
50
- case Cell.CELL_TYPE_FORMULA:
53
+ case FORMULA:
51
54
  PoiExcelColumnValueType valueType = bean.getValueType();
52
55
  if (valueType == PoiExcelColumnValueType.CELL_FORMULA) {
53
56
  visitor.visitCellFormula(column, cell);
@@ -55,13 +58,13 @@ public class PoiExcelCellValueVisitor {
55
58
  visitCellValueFormula(bean, cell, visitor);
56
59
  }
57
60
  return;
58
- case Cell.CELL_TYPE_BLANK:
61
+ case BLANK:
59
62
  visitCellValueBlank(bean, cell, visitor);
60
63
  return;
61
- case Cell.CELL_TYPE_BOOLEAN:
64
+ case BOOLEAN:
62
65
  visitor.visitCellValueBoolean(column, cell, cell.getBooleanCellValue());
63
66
  return;
64
- case Cell.CELL_TYPE_ERROR:
67
+ case ERROR:
65
68
  visitCellValueError(bean, cell, cell.getErrorCellValue(), visitor);
66
69
  return;
67
70
  default:
@@ -70,45 +73,72 @@ public class PoiExcelCellValueVisitor {
70
73
  }
71
74
 
72
75
  protected void visitCellValueBlank(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) {
73
- assert cell.getCellType() == Cell.CELL_TYPE_BLANK;
76
+ assert cell.getCellTypeEnum() == CellType.BLANK;
74
77
 
75
78
  Column column = bean.getColumn();
76
79
 
77
- boolean search = bean.getSearchMergedCell();
78
- if (!search) {
79
- visitor.visitCellValueBlank(column, cell);
80
- return;
80
+ CellRangeAddress region = findRegion(bean, cell);
81
+ if (region != null) {
82
+ Row firstRow = cell.getSheet().getRow(region.getFirstRow());
83
+ if (firstRow == null) {
84
+ visitCellNull(column);
85
+ return;
86
+ }
87
+ Cell firstCell = firstRow.getCell(region.getFirstColumn());
88
+ if (firstCell == null) {
89
+ visitCellNull(column);
90
+ return;
91
+ }
92
+
93
+ if (firstCell.getRowIndex() != cell.getRowIndex() || firstCell.getColumnIndex() != cell.getColumnIndex()) {
94
+ visitCellValue(bean, firstCell, visitor);
95
+ return;
96
+ }
97
+ }
98
+
99
+ visitor.visitCellValueBlank(column, cell);
100
+ }
101
+
102
+ protected CellRangeAddress findRegion(PoiExcelColumnBean bean, Cell cell) {
103
+ SearchMergedCell search = bean.getSearchMergedCell();
104
+ switch (search) {
105
+ case NONE:
106
+ return null;
107
+ case LINEAR_SEARCH:
108
+ return findRegionLinearSearch(bean, cell);
109
+ default:
110
+ return findRegionTreeSearch(bean, cell);
81
111
  }
112
+ }
82
113
 
114
+ protected CellRangeAddress findRegionLinearSearch(PoiExcelColumnBean bean, Cell cell) {
115
+ Sheet sheet = cell.getSheet();
83
116
  int r = cell.getRowIndex();
84
117
  int c = cell.getColumnIndex();
85
118
 
86
- Sheet sheet = cell.getSheet();
87
119
  int size = sheet.getNumMergedRegions();
88
120
  for (int i = 0; i < size; i++) {
89
- CellRangeAddress range = visitorValue.getSheet().getMergedRegion(i);
90
- if (range.isInRange(r, c)) {
91
- Row firstRow = sheet.getRow(range.getFirstRow());
92
- if (firstRow == null) {
93
- visitCellNull(column);
94
- return;
95
- }
96
- Cell firstCell = firstRow.getCell(range.getFirstColumn());
97
- if (firstCell == null) {
98
- visitCellNull(column);
99
- return;
100
- }
101
-
102
- visitCellValue(bean, firstCell, visitor);
103
- return;
121
+ CellRangeAddress region = sheet.getMergedRegion(i);
122
+ if (region.isInRange(r, c)) {
123
+ return region;
104
124
  }
105
125
  }
106
126
 
107
- visitor.visitCellValueBlank(column, cell);
127
+ return null;
128
+ }
129
+
130
+ private final MergedRegionMap mergedRegionMap = new MergedRegionMap();
131
+
132
+ protected CellRangeAddress findRegionTreeSearch(PoiExcelColumnBean bean, Cell cell) {
133
+ Sheet sheet = cell.getSheet();
134
+ int r = cell.getRowIndex();
135
+ int c = cell.getColumnIndex();
136
+
137
+ return mergedRegionMap.get(sheet, r, c);
108
138
  }
109
139
 
110
140
  protected void visitCellValueFormula(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) {
111
- assert cell.getCellType() == Cell.CELL_TYPE_FORMULA;
141
+ assert cell.getCellTypeEnum() == CellType.FORMULA;
112
142
 
113
143
  FormulaHandling handling = bean.getFormulaHandling();
114
144
  switch (handling) {
@@ -124,24 +154,24 @@ public class PoiExcelCellValueVisitor {
124
154
  protected void visitCellValueFormulaCashedValue(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) {
125
155
  Column column = bean.getColumn();
126
156
 
127
- int cellType = cell.getCachedFormulaResultType();
157
+ CellType cellType = cell.getCachedFormulaResultTypeEnum();
128
158
  switch (cellType) {
129
- case Cell.CELL_TYPE_NUMERIC:
159
+ case NUMERIC:
130
160
  visitor.visitCellValueNumeric(column, cell, cell.getNumericCellValue());
131
161
  return;
132
- case Cell.CELL_TYPE_STRING:
162
+ case STRING:
133
163
  visitor.visitCellValueString(column, cell, cell.getStringCellValue());
134
164
  return;
135
- case Cell.CELL_TYPE_BLANK:
165
+ case BLANK:
136
166
  visitCellValueBlank(bean, cell, visitor);
137
167
  return;
138
- case Cell.CELL_TYPE_BOOLEAN:
168
+ case BOOLEAN:
139
169
  visitor.visitCellValueBoolean(column, cell, cell.getBooleanCellValue());
140
170
  return;
141
- case Cell.CELL_TYPE_ERROR:
171
+ case ERROR:
142
172
  visitCellValueError(bean, cell, cell.getErrorCellValue(), visitor);
143
173
  return;
144
- case Cell.CELL_TYPE_FORMULA:
174
+ case FORMULA:
145
175
  default:
146
176
  throw new IllegalStateException(MessageFormat.format("unsupported POI cellType={0}", cellType));
147
177
  }
@@ -198,24 +228,24 @@ public class PoiExcelCellValueVisitor {
198
228
  throw new RuntimeException(MessageFormat.format("evaluate error. formula={0}", cell.getCellFormula()), e);
199
229
  }
200
230
 
201
- int cellType = cellValue.getCellType();
231
+ CellType cellType = cellValue.getCellTypeEnum();
202
232
  switch (cellType) {
203
- case Cell.CELL_TYPE_NUMERIC:
233
+ case NUMERIC:
204
234
  visitor.visitCellValueNumeric(column, cellValue, cellValue.getNumberValue());
205
235
  return;
206
- case Cell.CELL_TYPE_STRING:
236
+ case STRING:
207
237
  visitor.visitCellValueString(column, cellValue, cellValue.getStringValue());
208
238
  return;
209
- case Cell.CELL_TYPE_BLANK:
239
+ case BLANK:
210
240
  visitor.visitCellValueBlank(column, cellValue);
211
241
  return;
212
- case Cell.CELL_TYPE_BOOLEAN:
242
+ case BOOLEAN:
213
243
  visitor.visitCellValueBoolean(column, cellValue, cellValue.getBooleanValue());
214
244
  return;
215
- case Cell.CELL_TYPE_ERROR:
245
+ case ERROR:
216
246
  visitCellValueError(bean, cellValue, cellValue.getErrorValue(), visitor);
217
247
  return;
218
- case Cell.CELL_TYPE_FORMULA:
248
+ case FORMULA:
219
249
  default:
220
250
  throw new IllegalStateException(MessageFormat.format("unsupported POI cellType={0}", cellType));
221
251
  }
@@ -40,7 +40,7 @@ public class PoiExcelClientAnchorVisitor extends AbstractPoiExcelCellAttributeVi
40
40
  map.put("anchor_type", new AttributeSupplier<ClientAnchor>() {
41
41
  @Override
42
42
  public Object get(Column column, Cell cell, ClientAnchor anchor) {
43
- return (long) anchor.getAnchorType();
43
+ return (long) anchor.getAnchorType().value;
44
44
  }
45
45
  });
46
46
  map.put("col1", new AttributeSupplier<ClientAnchor>() {
@@ -3,10 +3,13 @@ package org.embulk.parser.poi_excel.visitor;
3
3
  import java.text.MessageFormat;
4
4
 
5
5
  import org.apache.poi.ss.usermodel.Cell;
6
+ import org.apache.poi.ss.usermodel.CellType;
6
7
  import org.apache.poi.ss.usermodel.Row;
8
+ import org.apache.poi.ss.usermodel.Sheet;
7
9
  import org.apache.poi.ss.util.CellReference;
8
10
  import org.embulk.parser.poi_excel.PoiExcelColumnValueType;
9
11
  import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
12
+ import org.embulk.parser.poi_excel.bean.util.PoiExcelCellAddress;
10
13
  import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor;
11
14
  import org.embulk.spi.Column;
12
15
  import org.embulk.spi.ColumnVisitor;
@@ -79,16 +82,44 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
79
82
  protected void visitCell(Column column, CellVisitor visitor) {
80
83
  PoiExcelColumnBean bean = visitorValue.getColumnBean(column);
81
84
  PoiExcelColumnValueType valueType = bean.getValueType();
85
+ PoiExcelCellAddress cellAddress = bean.getCellAddress();
82
86
 
83
87
  switch (valueType) {
84
88
  case SHEET_NAME:
85
- visitor.visitSheetName(column);
89
+ if (cellAddress != null) {
90
+ Sheet sheet = cellAddress.getSheet(currentRow);
91
+ visitor.visitSheetName(column, sheet);
92
+ } else {
93
+ visitor.visitSheetName(column);
94
+ }
86
95
  return;
87
96
  case ROW_NUMBER:
88
- visitor.visitRowNumber(column, currentRow.getRowNum() + 1);
97
+ int rowIndex;
98
+ if (cellAddress != null) {
99
+ Cell cell = cellAddress.getCell(currentRow);
100
+ if (cell == null) {
101
+ visitCellNull(column);
102
+ return;
103
+ }
104
+ rowIndex = cell.getRowIndex();
105
+ } else {
106
+ rowIndex = currentRow.getRowNum();
107
+ }
108
+ visitor.visitRowNumber(column, rowIndex + 1);
89
109
  return;
90
110
  case COLUMN_NUMBER:
91
- visitor.visitColumnNumber(column, bean.getColumnIndex() + 1);
111
+ int columnIndex;
112
+ if (cellAddress != null) {
113
+ Cell cell = cellAddress.getCell(currentRow);
114
+ if (cell == null) {
115
+ visitCellNull(column);
116
+ return;
117
+ }
118
+ columnIndex = cell.getColumnIndex();
119
+ } else {
120
+ columnIndex = bean.getColumnIndex();
121
+ }
122
+ visitor.visitColumnNumber(column, columnIndex + 1);
92
123
  return;
93
124
  case CONSTANT:
94
125
  visitCellConstant(column, bean.getValueTypeSuffix(), visitor);
@@ -98,7 +129,12 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
98
129
  }
99
130
 
100
131
  assert valueType.useCell();
101
- Cell cell = currentRow.getCell(bean.getColumnIndex());
132
+ Cell cell;
133
+ if (cellAddress != null) {
134
+ cell = cellAddress.getCell(currentRow);
135
+ } else {
136
+ cell = currentRow.getCell(bean.getColumnIndex());
137
+ }
102
138
  if (cell == null) {
103
139
  visitCellNull(column);
104
140
  return;
@@ -118,13 +154,13 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
118
154
  visitCellComment(bean, cell, visitor);
119
155
  return;
120
156
  case CELL_TYPE:
121
- visitCellType(bean, cell, cell.getCellType(), visitor);
157
+ visitCellType(bean, cell, cell.getCellTypeEnum(), visitor);
122
158
  return;
123
159
  case CELL_CACHED_TYPE:
124
- if (cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
125
- visitCellType(bean, cell, cell.getCachedFormulaResultType(), visitor);
160
+ if (cell.getCellTypeEnum() == CellType.FORMULA) {
161
+ visitCellType(bean, cell, cell.getCachedFormulaResultTypeEnum(), visitor);
126
162
  } else {
127
- visitCellType(bean, cell, cell.getCellType(), visitor);
163
+ visitCellType(bean, cell, cell.getCellTypeEnum(), visitor);
128
164
  }
129
165
  return;
130
166
  default:
@@ -164,7 +200,7 @@ public class PoiExcelColumnVisitor implements ColumnVisitor {
164
200
  delegator.visit(bean, cell, visitor);
165
201
  }
166
202
 
167
- private void visitCellType(PoiExcelColumnBean bean, Cell cell, int cellType, CellVisitor visitor) {
203
+ private void visitCellType(PoiExcelColumnBean bean, Cell cell, CellType cellType, CellVisitor visitor) {
168
204
  PoiExcelCellTypeVisitor delegator = factory.getPoiExcelCellTypeVisitor();
169
205
  delegator.visit(bean, cell, cellType, visitor);
170
206
  }
@@ -38,6 +38,11 @@ public class BooleanCellVisitor extends CellVisitor {
38
38
  @Override
39
39
  public void visitSheetName(Column column) {
40
40
  Sheet sheet = visitorValue.getSheet();
41
+ visitSheetName(column, sheet);
42
+ }
43
+
44
+ @Override
45
+ public void visitSheetName(Column column, Sheet sheet) {
41
46
  int index = sheet.getWorkbook().getSheetIndex(sheet);
42
47
  pageBuilder.setBoolean(column, index != 0);
43
48
  }
@@ -3,6 +3,7 @@ package org.embulk.parser.poi_excel.visitor.embulk;
3
3
  import java.text.MessageFormat;
4
4
 
5
5
  import org.apache.poi.ss.usermodel.Cell;
6
+ import org.apache.poi.ss.usermodel.Sheet;
6
7
  import org.embulk.config.ConfigException;
7
8
  import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
8
9
  import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.ErrorStrategy;
@@ -40,6 +41,8 @@ public abstract class CellVisitor {
40
41
 
41
42
  public abstract void visitSheetName(Column column);
42
43
 
44
+ public abstract void visitSheetName(Column column, Sheet sheet);
45
+
43
46
  public abstract void visitRowNumber(Column column, int index1);
44
47
 
45
48
  public abstract void visitColumnNumber(Column column, int index1);
@@ -45,6 +45,11 @@ public class DoubleCellVisitor extends CellVisitor {
45
45
  @Override
46
46
  public void visitSheetName(Column column) {
47
47
  Sheet sheet = visitorValue.getSheet();
48
+ visitSheetName(column, sheet);
49
+ }
50
+
51
+ @Override
52
+ public void visitSheetName(Column column, Sheet sheet) {
48
53
  int index = sheet.getWorkbook().getSheetIndex(sheet);
49
54
  pageBuilder.setDouble(column, index);
50
55
  }
@@ -45,6 +45,11 @@ public class LongCellVisitor extends CellVisitor {
45
45
  @Override
46
46
  public void visitSheetName(Column column) {
47
47
  Sheet sheet = visitorValue.getSheet();
48
+ visitSheetName(column, sheet);
49
+ }
50
+
51
+ @Override
52
+ public void visitSheetName(Column column, Sheet sheet) {
48
53
  int index = sheet.getWorkbook().getSheetIndex(sheet);
49
54
  pageBuilder.setLong(column, index);
50
55
  }
@@ -70,6 +70,11 @@ public class StringCellVisitor extends CellVisitor {
70
70
  @Override
71
71
  public void visitSheetName(Column column) {
72
72
  Sheet sheet = visitorValue.getSheet();
73
+ visitSheetName(column, sheet);
74
+ }
75
+
76
+ @Override
77
+ public void visitSheetName(Column column, Sheet sheet) {
73
78
  pageBuilder.setString(column, sheet.getSheetName());
74
79
  }
75
80
 
@@ -61,6 +61,11 @@ public class TimestampCellVisitor extends CellVisitor {
61
61
  @Override
62
62
  public void visitSheetName(Column column) {
63
63
  Sheet sheet = visitorValue.getSheet();
64
+ visitSheetName(column, sheet);
65
+ }
66
+
67
+ @Override
68
+ public void visitSheetName(Column column, Sheet sheet) {
64
69
  doConvertError(column, sheet.getSheetName(), new UnsupportedOperationException(
65
70
  "unsupported conversion sheet_name to Embulk timestamp"));
66
71
  }
@@ -0,0 +1,51 @@
1
+ package org.embulk.parser.poi_excel.visitor.util;
2
+
3
+ import java.util.Map;
4
+ import java.util.TreeMap;
5
+ import java.util.concurrent.ConcurrentHashMap;
6
+
7
+ import org.apache.poi.ss.usermodel.Sheet;
8
+ import org.apache.poi.ss.util.CellRangeAddress;
9
+
10
+ public class MergedRegionMap {
11
+
12
+ private final Map<Sheet, Map<Integer, Map<Integer, CellRangeAddress>>> sheetMap = new ConcurrentHashMap<>();
13
+
14
+ public CellRangeAddress get(Sheet sheet, int rowIndex, int columnIndex) {
15
+ Map<Integer, Map<Integer, CellRangeAddress>> rowMap = sheetMap.get(sheet);
16
+ if (rowMap == null) {
17
+ synchronized (sheet) {
18
+ rowMap = createRowMap(sheet);
19
+ sheetMap.put(sheet, rowMap);
20
+ }
21
+ }
22
+
23
+ Map<Integer, CellRangeAddress> columnMap = rowMap.get(rowIndex);
24
+ if (columnMap == null) {
25
+ return null;
26
+ }
27
+ return columnMap.get(columnIndex);
28
+ }
29
+
30
+ protected Map<Integer, Map<Integer, CellRangeAddress>> createRowMap(Sheet sheet) {
31
+ Map<Integer, Map<Integer, CellRangeAddress>> rowMap = new TreeMap<>();
32
+
33
+ for (int i = sheet.getNumMergedRegions() - 1; i >= 0; i--) {
34
+ CellRangeAddress region = sheet.getMergedRegion(i);
35
+
36
+ for (int r = region.getFirstRow(); r <= region.getLastRow(); r++) {
37
+ Map<Integer, CellRangeAddress> columnMap = rowMap.get(r);
38
+ if (columnMap == null) {
39
+ columnMap = new TreeMap<>();
40
+ rowMap.put(r, columnMap);
41
+ }
42
+
43
+ for (int c = region.getFirstColumn(); c <= region.getLastColumn(); c++) {
44
+ columnMap.put(c, region);
45
+ }
46
+ }
47
+ }
48
+
49
+ return rowMap;
50
+ }
51
+ }
@@ -1,7 +1,7 @@
1
1
  package org.embulk.parser.poi_excel;
2
2
 
3
3
  import static org.hamcrest.CoreMatchers.is;
4
- import static org.junit.Assert.assertThat;
4
+ import static org.hamcrest.MatcherAssert.assertThat;
5
5
 
6
6
  import java.net.URL;
7
7
  import java.text.ParseException;
@@ -139,56 +139,6 @@ public class TestPoiExcelParserPlugin {
139
139
  assertThat(r.getAsString("col-s"), is("A"));
140
140
  }
141
141
 
142
- @Theory
143
- public void testSearchMergedCell_true(String excelFile) throws ParseException {
144
- try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
145
- tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
146
-
147
- EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
148
- parser.set("sheet", "merged_cell");
149
- parser.addColumn("a", "string");
150
- parser.addColumn("b", "string");
151
-
152
- URL inFile = getClass().getResource(excelFile);
153
- List<OutputRecord> result = tester.runParser(inFile, parser);
154
-
155
- assertThat(result.size(), is(4));
156
- check6(result, 0, "test3-a1", "test3-a1");
157
- check6(result, 1, "data", "0");
158
- check6(result, 2, null, null);
159
- check6(result, 3, null, null);
160
- }
161
- }
162
-
163
- @Theory
164
- public void testSearchMergedCell_false(String excelFile) throws ParseException {
165
- try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
166
- tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
167
-
168
- EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
169
- parser.set("sheet", "merged_cell");
170
- parser.set("search_merged_cell", false);
171
- parser.addColumn("a", "string");
172
- parser.addColumn("b", "string");
173
-
174
- URL inFile = getClass().getResource(excelFile);
175
- List<OutputRecord> result = tester.runParser(inFile, parser);
176
-
177
- assertThat(result.size(), is(4));
178
- check6(result, 0, "test3-a1", null);
179
- check6(result, 1, "data", "0");
180
- check6(result, 2, null, null);
181
- check6(result, 3, null, null);
182
- }
183
- }
184
-
185
- private void check6(List<OutputRecord> result, int index, String a, String b) {
186
- OutputRecord r = result.get(index);
187
- // System.out.println(r);
188
- assertThat(r.getAsString("a"), is(a));
189
- assertThat(r.getAsString("b"), is(b));
190
- }
191
-
192
142
  @Theory
193
143
  public void test_sheets(String excelFile) throws ParseException {
194
144
  try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
@@ -0,0 +1,69 @@
1
+ package org.embulk.parser.poi_excel;
2
+
3
+ import static org.hamcrest.CoreMatchers.is;
4
+ import static org.hamcrest.MatcherAssert.assertThat;
5
+
6
+ import java.net.URL;
7
+ import java.text.ParseException;
8
+ import java.util.List;
9
+
10
+ import org.embulk.parser.EmbulkPluginTester;
11
+ import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord;
12
+ import org.embulk.parser.EmbulkTestParserConfig;
13
+ import org.junit.experimental.theories.DataPoints;
14
+ import org.junit.experimental.theories.Theories;
15
+ import org.junit.experimental.theories.Theory;
16
+ import org.junit.runner.RunWith;
17
+
18
+ @RunWith(Theories.class)
19
+ public class TestPoiExcelParserPlugin_cellAddress {
20
+
21
+ @DataPoints
22
+ public static String[] FILES = { "test1.xls", "test2.xlsx" };
23
+
24
+ @Theory
25
+ public void testCellAddress(String excelFile) throws ParseException {
26
+ try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
27
+ tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
28
+
29
+ EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
30
+ parser.set("sheet", "test1");
31
+ parser.set("skip_header_lines", 1);
32
+ parser.addColumn("text", "string").set("column_number", "D");
33
+ parser.addColumn("fix_value", "string").set("cell_address", "B1").set("value", "cell_value");
34
+ parser.addColumn("fix_sheet", "string").set("cell_address", "B1").set("value", "sheet_name");
35
+ parser.addColumn("fix_row", "long").set("cell_address", "B1").set("value", "row_number");
36
+ parser.addColumn("fix_col", "long").set("cell_address", "B1").set("value", "column_number");
37
+ parser.addColumn("other_sheet_value", "string").set("cell_address", "style!B5").set("value", "cell_value");
38
+ parser.addColumn("other_sheet_name", "string").set("cell_address", "style!B5").set("value", "sheet_name");
39
+ parser.addColumn("other_sheet_row", "long").set("cell_address", "style!B5").set("value", "row_number");
40
+ parser.addColumn("other_sheet_col", "string").set("cell_address", "style!B5").set("value", "column_number");
41
+
42
+ URL inFile = getClass().getResource(excelFile);
43
+ List<OutputRecord> result = tester.runParser(inFile, parser);
44
+
45
+ assertThat(result.size(), is(7));
46
+ check1(result, 0, "abc");
47
+ check1(result, 1, "def");
48
+ check1(result, 2, "456");
49
+ check1(result, 3, "abc");
50
+ check1(result, 4, "abc");
51
+ check1(result, 5, "true");
52
+ check1(result, 6, null);
53
+ }
54
+ }
55
+
56
+ private void check1(List<OutputRecord> result, int index, String text) {
57
+ OutputRecord record = result.get(index);
58
+ // System.out.println(record);
59
+ assertThat(record.getAsString("text"), is(text));
60
+ assertThat(record.getAsString("fix_value"), is("long"));
61
+ assertThat(record.getAsString("fix_sheet"), is("test1"));
62
+ assertThat(record.getAsLong("fix_row"), is(1L));
63
+ assertThat(record.getAsLong("fix_col"), is(2L));
64
+ assertThat(record.getAsString("other_sheet_value"), is("bottom"));
65
+ assertThat(record.getAsString("other_sheet_name"), is("style"));
66
+ assertThat(record.getAsLong("other_sheet_row"), is(5L));
67
+ assertThat(record.getAsString("other_sheet_col"), is("B"));
68
+ }
69
+ }
@@ -2,7 +2,7 @@ package org.embulk.parser.poi_excel;
2
2
 
3
3
  import static org.hamcrest.CoreMatchers.is;
4
4
  import static org.hamcrest.CoreMatchers.nullValue;
5
- import static org.junit.Assert.assertThat;
5
+ import static org.hamcrest.MatcherAssert.assertThat;
6
6
  import static org.junit.Assert.fail;
7
7
 
8
8
  import java.net.URL;
@@ -2,7 +2,7 @@ package org.embulk.parser.poi_excel;
2
2
 
3
3
  import static org.hamcrest.CoreMatchers.is;
4
4
  import static org.hamcrest.CoreMatchers.nullValue;
5
- import static org.junit.Assert.assertThat;
5
+ import static org.hamcrest.MatcherAssert.assertThat;
6
6
  import static org.junit.Assert.fail;
7
7
 
8
8
  import java.net.URL;
@@ -2,7 +2,7 @@ package org.embulk.parser.poi_excel;
2
2
 
3
3
  import static org.hamcrest.CoreMatchers.is;
4
4
  import static org.hamcrest.CoreMatchers.nullValue;
5
- import static org.junit.Assert.assertThat;
5
+ import static org.hamcrest.MatcherAssert.assertThat;
6
6
  import static org.junit.Assert.fail;
7
7
 
8
8
  import java.net.URL;