embulk-parser-poi_excel 0.1.5 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +80 -21
  3. data/build.gradle +21 -11
  4. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  5. data/gradle/wrapper/gradle-wrapper.properties +5 -6
  6. data/gradlew +43 -35
  7. data/gradlew.bat +4 -10
  8. data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +25 -3
  9. data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +102 -11
  10. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +132 -6
  11. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +167 -47
  12. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelSheetBean.java +13 -1
  13. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecord.java +52 -0
  14. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordColumn.java +80 -0
  15. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordRow.java +76 -0
  16. data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordSheet.java +49 -0
  17. data/src/main/java/org/embulk/parser/poi_excel/bean/record/RecordType.java +114 -0
  18. data/src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java +59 -0
  19. data/src/main/java/org/embulk/parser/poi_excel/bean/util/SearchMergedCell.java +71 -0
  20. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java +0 -6
  21. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java +11 -11
  22. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java +52 -0
  23. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java +87 -41
  24. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java +1 -1
  25. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +60 -12
  26. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorFactory.java +14 -0
  27. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/BooleanCellVisitor.java +5 -0
  28. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +3 -0
  29. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/DoubleCellVisitor.java +5 -0
  30. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/LongCellVisitor.java +5 -0
  31. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java +30 -2
  32. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/TimestampCellVisitor.java +5 -0
  33. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionFinder.java +9 -0
  34. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionList.java +20 -0
  35. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionMap.java +55 -0
  36. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionNothing.java +12 -0
  37. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java +27 -79
  38. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellAddress.java +69 -0
  39. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellComment.java +1 -1
  40. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java +1 -1
  41. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java +1 -1
  42. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java +14 -14
  43. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java +79 -0
  44. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java +1 -1
  45. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java +1 -1
  46. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java +1 -1
  47. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_formula.java +90 -0
  48. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_mergedCell.java +94 -0
  49. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_recordType.java +192 -0
  50. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java +35 -1
  51. metadata +36 -17
@@ -11,6 +11,7 @@ import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
11
11
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
12
12
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetCommonOptionTask;
13
13
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetOptionTask;
14
+ import org.embulk.parser.poi_excel.bean.record.RecordType;
14
15
  import org.embulk.spi.Column;
15
16
  import org.embulk.spi.ColumnConfig;
16
17
  import org.embulk.spi.Schema;
@@ -75,13 +76,24 @@ public class PoiExcelSheetBean {
75
76
  columnBeanList.add(bean);
76
77
  }
77
78
 
78
- new PoiExcelColumnIndex().initializeColumnIndex(task, columnBeanList);
79
+ new PoiExcelColumnIndex(this).initializeColumnIndex(task, columnBeanList);
79
80
  }
80
81
 
81
82
  public final List<SheetCommonOptionTask> getSheetOption() {
82
83
  return sheetTaskList;
83
84
  }
84
85
 
86
+ public RecordType getRecordType() {
87
+ List<SheetCommonOptionTask> list = getSheetOption();
88
+ for (SheetCommonOptionTask sheetTask : list) {
89
+ Optional<String> value = sheetTask.getRecordType();
90
+ if (value.isPresent()) {
91
+ return RecordType.of(value.get());
92
+ }
93
+ }
94
+ return RecordType.ROW;
95
+ }
96
+
85
97
  public int getSkipHeaderLines() {
86
98
  List<SheetCommonOptionTask> list = getSheetOption();
87
99
  for (SheetCommonOptionTask sheetTask : list) {
@@ -0,0 +1,52 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import org.apache.poi.ss.usermodel.Cell;
4
+ import org.apache.poi.ss.usermodel.Sheet;
5
+ import org.apache.poi.ss.util.CellReference;
6
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
7
+
8
+ public abstract class PoiExcelRecord {
9
+
10
+ // loop record
11
+
12
+ private Sheet sheet;
13
+
14
+ public final void initialize(Sheet sheet, int skipHeaderLines) {
15
+ this.sheet = sheet;
16
+ initializeLoop(skipHeaderLines);
17
+ }
18
+
19
+ protected abstract void initializeLoop(int skipHeaderLines);
20
+
21
+ public final Sheet getSheet() {
22
+ return sheet;
23
+ }
24
+
25
+ public abstract boolean exists();
26
+
27
+ public abstract void moveNext();
28
+
29
+ // current record
30
+
31
+ public final void logStart() {
32
+ logStartEnd("start");
33
+ }
34
+
35
+ public final void logEnd() {
36
+ logStartEnd("end");
37
+ }
38
+
39
+ protected abstract void logStartEnd(String part);
40
+
41
+ public abstract int getRowIndex(PoiExcelColumnBean bean);
42
+
43
+ public abstract int getColumnIndex(PoiExcelColumnBean bean);
44
+
45
+ public abstract Cell getCell(PoiExcelColumnBean bean);
46
+
47
+ public CellReference getCellReference(PoiExcelColumnBean bean) {
48
+ int rowIndex = getRowIndex(bean);
49
+ int columnIndex = getColumnIndex(bean);
50
+ return new CellReference(rowIndex, columnIndex);
51
+ }
52
+ }
@@ -0,0 +1,80 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import org.apache.poi.ss.usermodel.Cell;
4
+ import org.apache.poi.ss.usermodel.Row;
5
+ import org.apache.poi.ss.usermodel.Sheet;
6
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
7
+ import org.embulk.spi.Exec;
8
+ import org.slf4j.Logger;
9
+
10
+ public class PoiExcelRecordColumn extends PoiExcelRecord {
11
+ private final Logger log = Exec.getLogger(getClass());
12
+
13
+ private int maxColumnIndex;
14
+ private int currentColumnIndex;
15
+
16
+ @Override
17
+ protected void initializeLoop(int skipHeaderLines) {
18
+ int minColumnIndex = Integer.MAX_VALUE;
19
+ maxColumnIndex = 0;
20
+ Sheet sheet = getSheet();
21
+ for (Row row : sheet) {
22
+ int firstIndex = row.getFirstCellNum();
23
+ if (firstIndex >= 0) {
24
+ minColumnIndex = Math.min(minColumnIndex, firstIndex);
25
+ }
26
+ maxColumnIndex = Math.max(maxColumnIndex, row.getLastCellNum());
27
+ }
28
+
29
+ this.currentColumnIndex = maxColumnIndex;
30
+ for (int i = minColumnIndex; i < maxColumnIndex; i++) {
31
+ if (i < skipHeaderLines) {
32
+ if (log.isDebugEnabled()) {
33
+ log.debug("column({}) skipped", i);
34
+ }
35
+ continue;
36
+ }
37
+
38
+ this.currentColumnIndex = i;
39
+ break;
40
+ }
41
+ }
42
+
43
+ @Override
44
+ public boolean exists() {
45
+ return currentColumnIndex < maxColumnIndex;
46
+ }
47
+
48
+ @Override
49
+ public void moveNext() {
50
+ currentColumnIndex++;
51
+ }
52
+
53
+ @Override
54
+ protected void logStartEnd(String part) {
55
+ if (log.isDebugEnabled()) {
56
+ log.debug("column({}) {}", currentColumnIndex, part);
57
+ }
58
+ }
59
+
60
+ @Override
61
+ public int getRowIndex(PoiExcelColumnBean bean) {
62
+ return bean.getColumnIndex();
63
+ }
64
+
65
+ @Override
66
+ public int getColumnIndex(PoiExcelColumnBean bean) {
67
+ return currentColumnIndex;
68
+ }
69
+
70
+ @Override
71
+ public Cell getCell(PoiExcelColumnBean bean) {
72
+ int rowIndex = getRowIndex(bean);
73
+ Row row = getSheet().getRow(rowIndex);
74
+ if (row == null) {
75
+ return null;
76
+ }
77
+ int columnIndex = getColumnIndex(bean);
78
+ return row.getCell(columnIndex);
79
+ }
80
+ }
@@ -0,0 +1,76 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import java.util.Iterator;
4
+
5
+ import org.apache.poi.ss.usermodel.Cell;
6
+ import org.apache.poi.ss.usermodel.Row;
7
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
8
+ import org.embulk.spi.Exec;
9
+ import org.slf4j.Logger;
10
+
11
+ public class PoiExcelRecordRow extends PoiExcelRecord {
12
+ private final Logger log = Exec.getLogger(getClass());
13
+
14
+ private Iterator<Row> rowIterator;
15
+ private Row currentRow;
16
+
17
+ @Override
18
+ protected void initializeLoop(int skipHeaderLines) {
19
+ this.rowIterator = getSheet().iterator();
20
+ this.currentRow = null;
21
+ while (rowIterator.hasNext()) {
22
+ Row row = rowIterator.next();
23
+
24
+ int rowIndex = row.getRowNum();
25
+ if (rowIndex < skipHeaderLines) {
26
+ if (log.isDebugEnabled()) {
27
+ log.debug("row({}) skipped", rowIndex);
28
+ }
29
+ continue;
30
+ }
31
+
32
+ this.currentRow = row;
33
+ break;
34
+ }
35
+ }
36
+
37
+ @Override
38
+ public boolean exists() {
39
+ return currentRow != null;
40
+ }
41
+
42
+ @Override
43
+ public void moveNext() {
44
+ if (rowIterator.hasNext()) {
45
+ this.currentRow = rowIterator.next();
46
+ } else {
47
+ this.currentRow = null;
48
+ }
49
+ }
50
+
51
+ @Override
52
+ protected void logStartEnd(String part) {
53
+ assert currentRow != null;
54
+ if (log.isDebugEnabled()) {
55
+ log.debug("row({}) {}", currentRow.getRowNum(), part);
56
+ }
57
+ }
58
+
59
+ @Override
60
+ public int getRowIndex(PoiExcelColumnBean bean) {
61
+ assert currentRow != null;
62
+ return currentRow.getRowNum();
63
+ }
64
+
65
+ @Override
66
+ public int getColumnIndex(PoiExcelColumnBean bean) {
67
+ return bean.getColumnIndex();
68
+ }
69
+
70
+ @Override
71
+ public Cell getCell(PoiExcelColumnBean bean) {
72
+ assert currentRow != null;
73
+ int columnIndex = getColumnIndex(bean);
74
+ return currentRow.getCell(columnIndex);
75
+ }
76
+ }
@@ -0,0 +1,49 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import org.apache.poi.ss.usermodel.Cell;
4
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
5
+ import org.embulk.spi.Exec;
6
+ import org.slf4j.Logger;
7
+
8
+ public class PoiExcelRecordSheet extends PoiExcelRecord {
9
+ private final Logger log = Exec.getLogger(getClass());
10
+
11
+ private boolean exists;
12
+
13
+ @Override
14
+ protected void initializeLoop(int skipHeaderLines) {
15
+ this.exists = true;
16
+ }
17
+
18
+ @Override
19
+ public boolean exists() {
20
+ return exists;
21
+ }
22
+
23
+ @Override
24
+ public void moveNext() {
25
+ this.exists = false;
26
+ }
27
+
28
+ @Override
29
+ protected void logStartEnd(String part) {
30
+ if (log.isDebugEnabled()) {
31
+ log.debug("sheet({}) {}", getSheet().getSheetName(), part);
32
+ }
33
+ }
34
+
35
+ @Override
36
+ public int getRowIndex(PoiExcelColumnBean bean) {
37
+ throw new UnsupportedOperationException("unsupported at record_type=sheet");
38
+ }
39
+
40
+ @Override
41
+ public int getColumnIndex(PoiExcelColumnBean bean) {
42
+ throw new UnsupportedOperationException("unsupported at record_type=sheet");
43
+ }
44
+
45
+ @Override
46
+ public Cell getCell(PoiExcelColumnBean bean) {
47
+ throw new UnsupportedOperationException("unsupported at record_type=sheet");
48
+ }
49
+ }
@@ -0,0 +1,114 @@
1
+ package org.embulk.parser.poi_excel.bean.record;
2
+
3
+ import java.text.MessageFormat;
4
+ import java.util.ArrayList;
5
+ import java.util.List;
6
+
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
9
+ import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean;
10
+
11
+ import com.google.common.base.Optional;
12
+
13
+ public enum RecordType {
14
+ ROW {
15
+ @Override
16
+ public Optional<String> getRecordOption(PoiExcelColumnBean bean) {
17
+ return bean.getRowNumber();
18
+ }
19
+
20
+ @Override
21
+ public String getRecordOptionName() {
22
+ return ColumnOptionTask.CELL_ROW;
23
+ }
24
+
25
+ @Override
26
+ public Optional<String> getNumberOption(PoiExcelColumnBean bean) {
27
+ return bean.getColumnNumber();
28
+ }
29
+
30
+ @Override
31
+ public String getNumberOptionName() {
32
+ return ColumnOptionTask.CELL_COLUMN;
33
+ }
34
+
35
+ @Override
36
+ public PoiExcelRecord newPoiExcelRecord() {
37
+ return new PoiExcelRecordRow();
38
+ }
39
+ },
40
+ COLUMN {
41
+ @Override
42
+ public Optional<String> getRecordOption(PoiExcelColumnBean bean) {
43
+ return bean.getColumnNumber();
44
+ }
45
+
46
+ @Override
47
+ public String getRecordOptionName() {
48
+ return ColumnOptionTask.CELL_COLUMN;
49
+ }
50
+
51
+ @Override
52
+ public Optional<String> getNumberOption(PoiExcelColumnBean bean) {
53
+ return bean.getRowNumber();
54
+ }
55
+
56
+ @Override
57
+ public String getNumberOptionName() {
58
+ return ColumnOptionTask.CELL_ROW;
59
+ }
60
+
61
+ @Override
62
+ public PoiExcelRecord newPoiExcelRecord() {
63
+ return new PoiExcelRecordColumn();
64
+ }
65
+ },
66
+ SHEET {
67
+ @Override
68
+ public Optional<String> getRecordOption(PoiExcelColumnBean bean) {
69
+ return Optional.absent();
70
+ }
71
+
72
+ @Override
73
+ public String getRecordOptionName() {
74
+ return "-";
75
+ }
76
+
77
+ @Override
78
+ public Optional<String> getNumberOption(PoiExcelColumnBean bean) {
79
+ return Optional.absent();
80
+ }
81
+
82
+ @Override
83
+ public String getNumberOptionName() {
84
+ return "-";
85
+ }
86
+
87
+ @Override
88
+ public PoiExcelRecord newPoiExcelRecord() {
89
+ return new PoiExcelRecordSheet();
90
+ }
91
+ };
92
+
93
+ public abstract Optional<String> getRecordOption(PoiExcelColumnBean bean);
94
+
95
+ public abstract String getRecordOptionName();
96
+
97
+ public abstract Optional<String> getNumberOption(PoiExcelColumnBean bean);
98
+
99
+ public abstract String getNumberOptionName();
100
+
101
+ public abstract PoiExcelRecord newPoiExcelRecord();
102
+
103
+ public static RecordType of(String value) {
104
+ try {
105
+ return RecordType.valueOf(value.toUpperCase());
106
+ } catch (Exception e) {
107
+ List<String> list = new ArrayList<>();
108
+ for (RecordType s : RecordType.values()) {
109
+ list.add(s.name().toLowerCase());
110
+ }
111
+ throw new ConfigException(MessageFormat.format("illegal record_type={0}. expected={1}", value, list), e);
112
+ }
113
+ }
114
+ }
@@ -0,0 +1,59 @@
1
+ package org.embulk.parser.poi_excel.bean.util;
2
+
3
+ import java.text.MessageFormat;
4
+
5
+ import org.apache.poi.ss.usermodel.Cell;
6
+ import org.apache.poi.ss.usermodel.Row;
7
+ import org.apache.poi.ss.usermodel.Sheet;
8
+ import org.apache.poi.ss.usermodel.Workbook;
9
+ import org.apache.poi.ss.util.CellReference;
10
+ import org.embulk.parser.poi_excel.bean.record.PoiExcelRecord;
11
+
12
+ public class PoiExcelCellAddress {
13
+ private final CellReference cellReference;
14
+
15
+ public PoiExcelCellAddress(CellReference cellReference) {
16
+ this.cellReference = cellReference;
17
+ }
18
+
19
+ public String getSheetName() {
20
+ return cellReference.getSheetName();
21
+ }
22
+
23
+ public Sheet getSheet(PoiExcelRecord record) {
24
+ String sheetName = getSheetName();
25
+ if (sheetName != null) {
26
+ Workbook book = record.getSheet().getWorkbook();
27
+ Sheet sheet = book.getSheet(sheetName);
28
+ if (sheet == null) {
29
+ throw new RuntimeException(MessageFormat.format("not found sheet. sheetName={0}", sheetName));
30
+ }
31
+ return sheet;
32
+ } else {
33
+ return record.getSheet();
34
+ }
35
+ }
36
+
37
+ public int getRowIndex() {
38
+ return cellReference.getRow();
39
+ }
40
+
41
+ public int getColumnIndex() {
42
+ return cellReference.getCol();
43
+ }
44
+
45
+ public Cell getCell(PoiExcelRecord record) {
46
+ Sheet sheet = getSheet(record);
47
+
48
+ Row row = sheet.getRow(getRowIndex());
49
+ if (row == null) {
50
+ return null;
51
+ }
52
+
53
+ return row.getCell(getColumnIndex());
54
+ }
55
+
56
+ public String getString() {
57
+ return cellReference.formatAsString();
58
+ }
59
+ }