embulk-parser-poi_excel 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +10 -3
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelSheetBean.java +13 -11
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java +85 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4fc1f7535c2d5dd180ad60a729c0daccab15144c
|
4
|
+
data.tar.gz: 2c09c8ffa12e314ff1af31dd0c2bab4c02d00057
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0236a3b74bee6392af244631640fd8fe317fa42728a690eecfe4ef11b149fa5c5f9237a334af0d757008aabfff7040c9787fef363b9a84a67c799eff5148e328
|
7
|
+
data.tar.gz: df386175cd7cbc2be51d70682f21d62581f527c040dfeaeac86def0ddff0b62297d58fcd3920cbd5ae4be636e49ae7ecd3f08de15e28c2930d7b4313140af6bb
|
data/README.md
CHANGED
@@ -13,6 +13,7 @@ This plugin uses Apache POI.
|
|
13
13
|
* **sheets**: sheet name. (list of string, required)
|
14
14
|
* **skip_header_lines**: skip rows. (integer, default: `0`)
|
15
15
|
* **columns**: column definition. see below. (hash, required)
|
16
|
+
* **sheet_options**: sheet option. see below. (hash, default: null)
|
16
17
|
|
17
18
|
### columns
|
18
19
|
|
@@ -115,6 +116,38 @@ Processing method of convert error. ex) Excel boolean to Embulk timestamp
|
|
115
116
|
* `exception`: throw exception. (default)
|
116
117
|
|
117
118
|
|
119
|
+
### sheet_options
|
120
|
+
|
121
|
+
Options of indivisual sheet.
|
122
|
+
|
123
|
+
```yaml
|
124
|
+
parser:
|
125
|
+
type: poi_excel
|
126
|
+
sheets: [Sheet1, Sheet2]
|
127
|
+
columns:
|
128
|
+
- {name: date, type: timestamp, column_number: A}
|
129
|
+
- {name: foo, type: string}
|
130
|
+
- {name: bar, type: long}
|
131
|
+
sheet_options:
|
132
|
+
Sheet1:
|
133
|
+
skip_header_lines: 1
|
134
|
+
columns:
|
135
|
+
foo: {column_number: B}
|
136
|
+
bar: {column_number: C}
|
137
|
+
Sheet2:
|
138
|
+
skip_header_lines: 0
|
139
|
+
columns:
|
140
|
+
foo: {column_number: D}
|
141
|
+
bar: {value: constant.0}
|
142
|
+
```
|
143
|
+
|
144
|
+
*sheet_options* is map of sheet name.
|
145
|
+
Map values are *skip_header_lines*, *colums*.
|
146
|
+
|
147
|
+
*columns* is map of column name.
|
148
|
+
Map values are same *columns* in *parser*.
|
149
|
+
|
150
|
+
|
118
151
|
## Example
|
119
152
|
|
120
153
|
```yaml
|
@@ -148,5 +181,6 @@ $ embulk gem install embulk-parser-poi_excel
|
|
148
181
|
## Build
|
149
182
|
|
150
183
|
```
|
184
|
+
$ ./gradlew test
|
151
185
|
$ ./gradlew package
|
152
186
|
```
|
data/build.gradle
CHANGED
@@ -39,7 +39,7 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
39
39
|
|
40
40
|
public static final String TYPE = "poi_excel";
|
41
41
|
|
42
|
-
public interface PluginTask extends Task, TimestampParser.Task,
|
42
|
+
public interface PluginTask extends Task, TimestampParser.Task, SheetCommonOptionTask {
|
43
43
|
@Config("sheet")
|
44
44
|
@ConfigDefault("null")
|
45
45
|
public Optional<String> getSheet();
|
@@ -56,19 +56,26 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
56
56
|
@ConfigDefault("{}")
|
57
57
|
public Map<String, SheetOptionTask> getSheetOptions();
|
58
58
|
|
59
|
+
@Config("columns")
|
60
|
+
public SchemaConfig getColumns();
|
61
|
+
|
59
62
|
@Config("flush_count")
|
60
63
|
@ConfigDefault("100")
|
61
64
|
public int getFlushCount();
|
62
65
|
}
|
63
66
|
|
64
|
-
public interface
|
67
|
+
public interface SheetCommonOptionTask extends Task, ColumnCommonOptionTask {
|
65
68
|
|
66
69
|
@Config("skip_header_lines")
|
67
70
|
@ConfigDefault("null")
|
68
71
|
public Optional<Integer> getSkipHeaderLines();
|
72
|
+
}
|
73
|
+
|
74
|
+
public interface SheetOptionTask extends Task, SheetCommonOptionTask {
|
69
75
|
|
70
76
|
@Config("columns")
|
71
|
-
|
77
|
+
@ConfigDefault("null")
|
78
|
+
public Optional<Map<String, ColumnOptionTask>> getColumns();
|
72
79
|
}
|
73
80
|
|
74
81
|
public interface ColumnOptionTask extends Task, ColumnCommonOptionTask {
|
@@ -9,6 +9,7 @@ import java.util.Map.Entry;
|
|
9
9
|
import org.apache.poi.ss.usermodel.Sheet;
|
10
10
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
|
11
11
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
|
12
|
+
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetCommonOptionTask;
|
12
13
|
import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetOptionTask;
|
13
14
|
import org.embulk.spi.Column;
|
14
15
|
import org.embulk.spi.ColumnConfig;
|
@@ -20,7 +21,7 @@ public class PoiExcelSheetBean {
|
|
20
21
|
|
21
22
|
protected final Sheet sheet;
|
22
23
|
|
23
|
-
private final List<
|
24
|
+
private final List<SheetCommonOptionTask> sheetTaskList = new ArrayList<>(2);
|
24
25
|
|
25
26
|
private final List<PoiExcelColumnBean> columnBeanList = new ArrayList<>();
|
26
27
|
|
@@ -55,13 +56,14 @@ public class PoiExcelSheetBean {
|
|
55
56
|
List<ColumnConfig> list = task.getColumns().getColumns();
|
56
57
|
|
57
58
|
Map<String, ColumnOptionTask> map = new HashMap<>();
|
58
|
-
List<
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
String
|
63
|
-
|
64
|
-
|
59
|
+
List<SheetCommonOptionTask> slist = getSheetOption();
|
60
|
+
for (int i = slist.size() - 1; i >= 0; i--) {
|
61
|
+
SheetCommonOptionTask s = slist.get(i);
|
62
|
+
if (s instanceof SheetOptionTask) {
|
63
|
+
Optional<Map<String, ColumnOptionTask>> option = ((SheetOptionTask) s).getColumns();
|
64
|
+
if (option.isPresent()) {
|
65
|
+
map.putAll(option.get());
|
66
|
+
}
|
65
67
|
}
|
66
68
|
}
|
67
69
|
|
@@ -76,13 +78,13 @@ public class PoiExcelSheetBean {
|
|
76
78
|
new PoiExcelColumnIndex().initializeColumnIndex(task, columnBeanList);
|
77
79
|
}
|
78
80
|
|
79
|
-
public final List<
|
81
|
+
public final List<SheetCommonOptionTask> getSheetOption() {
|
80
82
|
return sheetTaskList;
|
81
83
|
}
|
82
84
|
|
83
85
|
public int getSkipHeaderLines() {
|
84
|
-
List<
|
85
|
-
for (
|
86
|
+
List<SheetCommonOptionTask> list = getSheetOption();
|
87
|
+
for (SheetCommonOptionTask sheetTask : list) {
|
86
88
|
Optional<Integer> value = sheetTask.getSkipHeaderLines();
|
87
89
|
if (value.isPresent()) {
|
88
90
|
return value.get();
|
@@ -0,0 +1,85 @@
|
|
1
|
+
package org.embulk.parser.poi_excel;
|
2
|
+
|
3
|
+
import static org.hamcrest.CoreMatchers.is;
|
4
|
+
import static org.junit.Assert.assertThat;
|
5
|
+
|
6
|
+
import java.net.URL;
|
7
|
+
import java.text.ParseException;
|
8
|
+
import java.util.Arrays;
|
9
|
+
import java.util.HashMap;
|
10
|
+
import java.util.List;
|
11
|
+
import java.util.Map;
|
12
|
+
|
13
|
+
import org.embulk.parser.EmbulkPluginTester;
|
14
|
+
import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord;
|
15
|
+
import org.embulk.parser.EmbulkTestParserConfig;
|
16
|
+
import org.junit.Test;
|
17
|
+
|
18
|
+
public class TestPoiExcelParserPlugin_sheets {
|
19
|
+
|
20
|
+
@Test
|
21
|
+
public void testSheets() throws ParseException {
|
22
|
+
try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
|
23
|
+
tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
|
24
|
+
|
25
|
+
EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
|
26
|
+
parser.set("sheets", Arrays.asList("test1", "formula_replace", "style"));
|
27
|
+
parser.addColumn("text", "string");
|
28
|
+
parser.addColumn("number", "long");
|
29
|
+
|
30
|
+
Map<String, Object> sheetOptions = new HashMap<>();
|
31
|
+
{
|
32
|
+
Map<String, Object> sheet = new HashMap<>();
|
33
|
+
sheet.put("skip_header_lines", "5");
|
34
|
+
Map<String, Object> columns = new HashMap<>();
|
35
|
+
columns.put("text", newMap("column_number", "D"));
|
36
|
+
columns.put("number", newMap("column_number", "B"));
|
37
|
+
sheet.put("columns", columns);
|
38
|
+
sheetOptions.put("test1", sheet);
|
39
|
+
}
|
40
|
+
{
|
41
|
+
Map<String, Object> sheet = new HashMap<>();
|
42
|
+
Map<String, Object> columns = new HashMap<>();
|
43
|
+
columns.put("number", newMap("value", "constant.0"));
|
44
|
+
sheet.put("columns", columns);
|
45
|
+
sheetOptions.put("formula_replace", sheet);
|
46
|
+
}
|
47
|
+
{
|
48
|
+
Map<String, Object> sheet = new HashMap<>();
|
49
|
+
sheet.put("skip_header_lines", "2");
|
50
|
+
Map<String, Object> columns = new HashMap<>();
|
51
|
+
columns.put("text", newMap("column_number", "B"));
|
52
|
+
columns.put("number", newMap("value", "constant.-1"));
|
53
|
+
sheet.put("columns", columns);
|
54
|
+
sheetOptions.put("style", sheet);
|
55
|
+
}
|
56
|
+
parser.set("sheet_options", sheetOptions);
|
57
|
+
|
58
|
+
URL inFile = getClass().getResource("test1.xls");
|
59
|
+
List<OutputRecord> result = tester.runParser(inFile, parser);
|
60
|
+
|
61
|
+
assertThat(result.size(), is(8));
|
62
|
+
check1(result, 0, "abc", 123L);
|
63
|
+
check1(result, 1, "true", 1L);
|
64
|
+
check1(result, 2, null, null);
|
65
|
+
check1(result, 3, "boolean", 0L);
|
66
|
+
check1(result, 4, "test2-b1", 0L);
|
67
|
+
check1(result, 5, "left", -1L);
|
68
|
+
check1(result, 6, "right", -1L);
|
69
|
+
check1(result, 7, "bottom", -1L);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
private Map<String, Object> newMap(String key, Object value) {
|
74
|
+
Map<String, Object> map = new HashMap<>();
|
75
|
+
map.put(key, value);
|
76
|
+
return map;
|
77
|
+
}
|
78
|
+
|
79
|
+
private void check1(List<OutputRecord> result, int index, String text, Long number) {
|
80
|
+
OutputRecord record = result.get(index);
|
81
|
+
// System.out.println(record);
|
82
|
+
assertThat(record.getAsString("text"), is(text));
|
83
|
+
assertThat(record.getAsLong("number"), is(number));
|
84
|
+
}
|
85
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-poi_excel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hishidama
|
@@ -88,9 +88,10 @@ files:
|
|
88
88
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java
|
89
89
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java
|
90
90
|
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java
|
91
|
+
- src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java
|
91
92
|
- src/test/resources/org/embulk/parser/poi_excel/test1.xls
|
92
93
|
- classpath/commons-codec-1.9.jar
|
93
|
-
- classpath/embulk-parser-poi_excel-0.1.
|
94
|
+
- classpath/embulk-parser-poi_excel-0.1.3.jar
|
94
95
|
- classpath/embulk-standards-0.7.5.jar
|
95
96
|
- classpath/poi-3.13.jar
|
96
97
|
- classpath/poi-ooxml-3.13.jar
|