embulk-parser-poi_excel 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3d1be437da5c3e86a63ef4c925d3ba11218c5344
4
- data.tar.gz: eb1b92328a4ac0c9619165bf046289538223e2ed
3
+ metadata.gz: 4fc1f7535c2d5dd180ad60a729c0daccab15144c
4
+ data.tar.gz: 2c09c8ffa12e314ff1af31dd0c2bab4c02d00057
5
5
  SHA512:
6
- metadata.gz: de797f9fd1895bb79987ae4564d0a4288a7a4c4115b217194462d9f5912912b0c814214c6319a363923953feeeb62f70cc2e4261b2a27695b1a26e6b60324c5c
7
- data.tar.gz: 8db340bcd1e3a46f54bd62513620c3d067432db0b5c28f71d9235f2dd3ae1aa5d0c37753ae1f1488f868e1915129c814d61d306dd58e9d58a602dcadd2ef9795
6
+ metadata.gz: 0236a3b74bee6392af244631640fd8fe317fa42728a690eecfe4ef11b149fa5c5f9237a334af0d757008aabfff7040c9787fef363b9a84a67c799eff5148e328
7
+ data.tar.gz: df386175cd7cbc2be51d70682f21d62581f527c040dfeaeac86def0ddff0b62297d58fcd3920cbd5ae4be636e49ae7ecd3f08de15e28c2930d7b4313140af6bb
data/README.md CHANGED
@@ -13,6 +13,7 @@ This plugin uses Apache POI.
13
13
  * **sheets**: sheet name. (list of string, required)
14
14
  * **skip_header_lines**: skip rows. (integer, default: `0`)
15
15
  * **columns**: column definition. see below. (hash, required)
16
+ * **sheet_options**: sheet option. see below. (hash, default: null)
16
17
 
17
18
  ### columns
18
19
 
@@ -115,6 +116,38 @@ Processing method of convert error. ex) Excel boolean to Embulk timestamp
115
116
  * `exception`: throw exception. (default)
116
117
 
117
118
 
119
+ ### sheet_options
120
+
121
+ Options of indivisual sheet.
122
+
123
+ ```yaml
124
+ parser:
125
+ type: poi_excel
126
+ sheets: [Sheet1, Sheet2]
127
+ columns:
128
+ - {name: date, type: timestamp, column_number: A}
129
+ - {name: foo, type: string}
130
+ - {name: bar, type: long}
131
+ sheet_options:
132
+ Sheet1:
133
+ skip_header_lines: 1
134
+ columns:
135
+ foo: {column_number: B}
136
+ bar: {column_number: C}
137
+ Sheet2:
138
+ skip_header_lines: 0
139
+ columns:
140
+ foo: {column_number: D}
141
+ bar: {value: constant.0}
142
+ ```
143
+
144
+ *sheet_options* is map of sheet name.
145
+ Map values are *skip_header_lines*, *colums*.
146
+
147
+ *columns* is map of column name.
148
+ Map values are same *columns* in *parser*.
149
+
150
+
118
151
  ## Example
119
152
 
120
153
  ```yaml
@@ -148,5 +181,6 @@ $ embulk gem install embulk-parser-poi_excel
148
181
  ## Build
149
182
 
150
183
  ```
184
+ $ ./gradlew test
151
185
  $ ./gradlew package
152
186
  ```
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.2"
16
+ version = "0.1.3"
17
17
 
18
18
  dependencies {
19
19
  compile "org.embulk:embulk-core:0.7.5"
@@ -39,7 +39,7 @@ public class PoiExcelParserPlugin implements ParserPlugin {
39
39
 
40
40
  public static final String TYPE = "poi_excel";
41
41
 
42
- public interface PluginTask extends Task, TimestampParser.Task, SheetOptionTask {
42
+ public interface PluginTask extends Task, TimestampParser.Task, SheetCommonOptionTask {
43
43
  @Config("sheet")
44
44
  @ConfigDefault("null")
45
45
  public Optional<String> getSheet();
@@ -56,19 +56,26 @@ public class PoiExcelParserPlugin implements ParserPlugin {
56
56
  @ConfigDefault("{}")
57
57
  public Map<String, SheetOptionTask> getSheetOptions();
58
58
 
59
+ @Config("columns")
60
+ public SchemaConfig getColumns();
61
+
59
62
  @Config("flush_count")
60
63
  @ConfigDefault("100")
61
64
  public int getFlushCount();
62
65
  }
63
66
 
64
- public interface SheetOptionTask extends Task, ColumnCommonOptionTask {
67
+ public interface SheetCommonOptionTask extends Task, ColumnCommonOptionTask {
65
68
 
66
69
  @Config("skip_header_lines")
67
70
  @ConfigDefault("null")
68
71
  public Optional<Integer> getSkipHeaderLines();
72
+ }
73
+
74
+ public interface SheetOptionTask extends Task, SheetCommonOptionTask {
69
75
 
70
76
  @Config("columns")
71
- public SchemaConfig getColumns();
77
+ @ConfigDefault("null")
78
+ public Optional<Map<String, ColumnOptionTask>> getColumns();
72
79
  }
73
80
 
74
81
  public interface ColumnOptionTask extends Task, ColumnCommonOptionTask {
@@ -9,6 +9,7 @@ import java.util.Map.Entry;
9
9
  import org.apache.poi.ss.usermodel.Sheet;
10
10
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask;
11
11
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask;
12
+ import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetCommonOptionTask;
12
13
  import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetOptionTask;
13
14
  import org.embulk.spi.Column;
14
15
  import org.embulk.spi.ColumnConfig;
@@ -20,7 +21,7 @@ public class PoiExcelSheetBean {
20
21
 
21
22
  protected final Sheet sheet;
22
23
 
23
- private final List<SheetOptionTask> sheetTaskList = new ArrayList<>(2);
24
+ private final List<SheetCommonOptionTask> sheetTaskList = new ArrayList<>(2);
24
25
 
25
26
  private final List<PoiExcelColumnBean> columnBeanList = new ArrayList<>();
26
27
 
@@ -55,13 +56,14 @@ public class PoiExcelSheetBean {
55
56
  List<ColumnConfig> list = task.getColumns().getColumns();
56
57
 
57
58
  Map<String, ColumnOptionTask> map = new HashMap<>();
58
- List<SheetOptionTask> slist = getSheetOption();
59
- if (slist.size() >= 2) {
60
- SheetOptionTask s = slist.get(0);
61
- for (ColumnConfig c : s.getColumns().getColumns()) {
62
- String name = c.getName();
63
- ColumnOptionTask t = c.getOption().loadConfig(ColumnOptionTask.class);
64
- map.put(name, t);
59
+ List<SheetCommonOptionTask> slist = getSheetOption();
60
+ for (int i = slist.size() - 1; i >= 0; i--) {
61
+ SheetCommonOptionTask s = slist.get(i);
62
+ if (s instanceof SheetOptionTask) {
63
+ Optional<Map<String, ColumnOptionTask>> option = ((SheetOptionTask) s).getColumns();
64
+ if (option.isPresent()) {
65
+ map.putAll(option.get());
66
+ }
65
67
  }
66
68
  }
67
69
 
@@ -76,13 +78,13 @@ public class PoiExcelSheetBean {
76
78
  new PoiExcelColumnIndex().initializeColumnIndex(task, columnBeanList);
77
79
  }
78
80
 
79
- public final List<SheetOptionTask> getSheetOption() {
81
+ public final List<SheetCommonOptionTask> getSheetOption() {
80
82
  return sheetTaskList;
81
83
  }
82
84
 
83
85
  public int getSkipHeaderLines() {
84
- List<SheetOptionTask> list = getSheetOption();
85
- for (SheetOptionTask sheetTask : list) {
86
+ List<SheetCommonOptionTask> list = getSheetOption();
87
+ for (SheetCommonOptionTask sheetTask : list) {
86
88
  Optional<Integer> value = sheetTask.getSkipHeaderLines();
87
89
  if (value.isPresent()) {
88
90
  return value.get();
@@ -0,0 +1,85 @@
1
+ package org.embulk.parser.poi_excel;
2
+
3
+ import static org.hamcrest.CoreMatchers.is;
4
+ import static org.junit.Assert.assertThat;
5
+
6
+ import java.net.URL;
7
+ import java.text.ParseException;
8
+ import java.util.Arrays;
9
+ import java.util.HashMap;
10
+ import java.util.List;
11
+ import java.util.Map;
12
+
13
+ import org.embulk.parser.EmbulkPluginTester;
14
+ import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord;
15
+ import org.embulk.parser.EmbulkTestParserConfig;
16
+ import org.junit.Test;
17
+
18
+ public class TestPoiExcelParserPlugin_sheets {
19
+
20
+ @Test
21
+ public void testSheets() throws ParseException {
22
+ try (EmbulkPluginTester tester = new EmbulkPluginTester()) {
23
+ tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class);
24
+
25
+ EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE);
26
+ parser.set("sheets", Arrays.asList("test1", "formula_replace", "style"));
27
+ parser.addColumn("text", "string");
28
+ parser.addColumn("number", "long");
29
+
30
+ Map<String, Object> sheetOptions = new HashMap<>();
31
+ {
32
+ Map<String, Object> sheet = new HashMap<>();
33
+ sheet.put("skip_header_lines", "5");
34
+ Map<String, Object> columns = new HashMap<>();
35
+ columns.put("text", newMap("column_number", "D"));
36
+ columns.put("number", newMap("column_number", "B"));
37
+ sheet.put("columns", columns);
38
+ sheetOptions.put("test1", sheet);
39
+ }
40
+ {
41
+ Map<String, Object> sheet = new HashMap<>();
42
+ Map<String, Object> columns = new HashMap<>();
43
+ columns.put("number", newMap("value", "constant.0"));
44
+ sheet.put("columns", columns);
45
+ sheetOptions.put("formula_replace", sheet);
46
+ }
47
+ {
48
+ Map<String, Object> sheet = new HashMap<>();
49
+ sheet.put("skip_header_lines", "2");
50
+ Map<String, Object> columns = new HashMap<>();
51
+ columns.put("text", newMap("column_number", "B"));
52
+ columns.put("number", newMap("value", "constant.-1"));
53
+ sheet.put("columns", columns);
54
+ sheetOptions.put("style", sheet);
55
+ }
56
+ parser.set("sheet_options", sheetOptions);
57
+
58
+ URL inFile = getClass().getResource("test1.xls");
59
+ List<OutputRecord> result = tester.runParser(inFile, parser);
60
+
61
+ assertThat(result.size(), is(8));
62
+ check1(result, 0, "abc", 123L);
63
+ check1(result, 1, "true", 1L);
64
+ check1(result, 2, null, null);
65
+ check1(result, 3, "boolean", 0L);
66
+ check1(result, 4, "test2-b1", 0L);
67
+ check1(result, 5, "left", -1L);
68
+ check1(result, 6, "right", -1L);
69
+ check1(result, 7, "bottom", -1L);
70
+ }
71
+ }
72
+
73
+ private Map<String, Object> newMap(String key, Object value) {
74
+ Map<String, Object> map = new HashMap<>();
75
+ map.put(key, value);
76
+ return map;
77
+ }
78
+
79
+ private void check1(List<OutputRecord> result, int index, String text, Long number) {
80
+ OutputRecord record = result.get(index);
81
+ // System.out.println(record);
82
+ assertThat(record.getAsString("text"), is(text));
83
+ assertThat(record.getAsLong("number"), is(number));
84
+ }
85
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-poi_excel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - hishidama
@@ -88,9 +88,10 @@ files:
88
88
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java
89
89
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java
90
90
  - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java
91
+ - src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java
91
92
  - src/test/resources/org/embulk/parser/poi_excel/test1.xls
92
93
  - classpath/commons-codec-1.9.jar
93
- - classpath/embulk-parser-poi_excel-0.1.2.jar
94
+ - classpath/embulk-parser-poi_excel-0.1.3.jar
94
95
  - classpath/embulk-standards-0.7.5.jar
95
96
  - classpath/poi-3.13.jar
96
97
  - classpath/poi-ooxml-3.13.jar