embulk-parser-poi_excel 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +21 -21
- data/README.md +247 -245
- data/build.gradle +92 -92
- data/classpath/{embulk-parser-poi_excel-0.1.12.jar → embulk-parser-poi_excel-0.1.13.jar} +0 -0
- data/gradlew +172 -172
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +2 -0
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +7 -6
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +3 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordSheet.java +6 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +15 -3
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorValue.java +28 -1
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +9 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 181f7ed2aa447dea8c2214aa72a24fe85d2d5975
|
4
|
+
data.tar.gz: 1eb5a1c03276531b7cc7771abb7d870e2d67d0c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 65ed0d0fa2e27d3ea00954a87062e49d91052e5478f33b6ec175d133e0f6e6f7e42be488d7d40fc3a259f833a6abe6ab72077fa72e007f6ba848defb9ec030a6
|
7
|
+
data.tar.gz: 1522b1223a8c7ad0577b022fae899820196cb2b703633363f92a3398bd33dcfcabd5e49525516b34beac29c2e2a97a3954ad23995295fa73316c915a7ff6b31c
|
data/LICENSE.txt
CHANGED
@@ -1,21 +1,21 @@
|
|
1
|
-
|
2
|
-
MIT License
|
3
|
-
|
4
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
a copy of this software and associated documentation files (the
|
6
|
-
"Software"), to deal in the Software without restriction, including
|
7
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be
|
13
|
-
included in all copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
1
|
+
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,245 +1,247 @@
|
|
1
|
-
# Apache POI Excel parser plugin for Embulk
|
2
|
-
|
3
|
-
Parses Microsoft Excel files(xls, xlsx) read by other file input plugins.
|
4
|
-
This plugin uses Apache POI.
|
5
|
-
|
6
|
-
## Overview
|
7
|
-
|
8
|
-
* **Plugin type**: parser
|
9
|
-
* **Guess supported**: no
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
- {name:
|
24
|
-
- {name:
|
25
|
-
- {name:
|
26
|
-
- {name:
|
27
|
-
- {name:
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
if omit **
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
* **
|
38
|
-
* **
|
39
|
-
* **
|
40
|
-
* **
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
* **
|
46
|
-
* **
|
47
|
-
* **
|
48
|
-
* **
|
49
|
-
* **
|
50
|
-
* **
|
51
|
-
* **
|
52
|
-
* **
|
53
|
-
* **
|
54
|
-
* **
|
55
|
-
* **
|
56
|
-
* **
|
57
|
-
* **
|
58
|
-
* **
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
* `
|
64
|
-
* `
|
65
|
-
* `
|
66
|
-
* `
|
67
|
-
* `
|
68
|
-
* `
|
69
|
-
* `
|
70
|
-
* `
|
71
|
-
* `
|
72
|
-
* `
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
*
|
84
|
-
*
|
85
|
-
*
|
86
|
-
*
|
87
|
-
*
|
88
|
-
*
|
89
|
-
*
|
90
|
-
*
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
* `
|
144
|
-
* `
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
* `
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
* `
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
- {name:
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
```
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
1
|
+
# Apache POI Excel parser plugin for Embulk
|
2
|
+
|
3
|
+
Parses Microsoft Excel files(xls, xlsx) read by other file input plugins.
|
4
|
+
This plugin uses Apache POI.
|
5
|
+
|
6
|
+
## Overview
|
7
|
+
|
8
|
+
* **Plugin type**: parser
|
9
|
+
* **Guess supported**: no
|
10
|
+
* Embulk 0.9 or earlier (refer to https://github.com/hishidama/embulk-parser-excel-poi for 0.10 and later)
|
11
|
+
|
12
|
+
|
13
|
+
## Example
|
14
|
+
|
15
|
+
```yaml
|
16
|
+
in:
|
17
|
+
type: any file input plugin type
|
18
|
+
parser:
|
19
|
+
type: poi_excel
|
20
|
+
sheets: ["DQ10-orb"]
|
21
|
+
skip_header_lines: 1 # first row is header.
|
22
|
+
columns:
|
23
|
+
- {name: row, type: long, value: row_number}
|
24
|
+
- {name: get_date, type: timestamp, cell_column: A, value: cell_value}
|
25
|
+
- {name: orb_type, type: string}
|
26
|
+
- {name: orb_name, type: string}
|
27
|
+
- {name: orb_shape, type: long}
|
28
|
+
- {name: drop_monster_name, type: string}
|
29
|
+
```
|
30
|
+
|
31
|
+
if omit **value**, specified `cell_value`.
|
32
|
+
if omit **cell_column** when **value** is `cell_value`, specified next column.
|
33
|
+
|
34
|
+
|
35
|
+
## Configuration
|
36
|
+
|
37
|
+
* **sheets**: sheet name. can use wildcards `*`, `?`. (list of string, required)
|
38
|
+
* **record_type**: record type. (`row`, `column` or `sheet`. default: `row`)
|
39
|
+
* **skip_header_lines**: skip rows when **record_type**=`row` (skip columns when **record_type**=`column`). ignored when **record_type**=`sheet`. (integer, default: `0`)
|
40
|
+
* **columns**: column definition. see below. (hash, required)
|
41
|
+
* **sheet_options**: sheet option. see below. (hash, default: null)
|
42
|
+
|
43
|
+
### columns
|
44
|
+
|
45
|
+
* **name**: Embulk column name. (string, required)
|
46
|
+
* **type**: Embulk column type. (string, required)
|
47
|
+
* **value**: value type. see below. (string, default: `cell_value`)
|
48
|
+
* **column_number**: same as **cell_column**.
|
49
|
+
* **cell_column**: Excel column number. see below. (string, default: next column when **record_type**=`row`)
|
50
|
+
* **cell_row**: Excel row number. see below. (integer, default: next row when **record_type**=`column`)
|
51
|
+
* **cell_address**: Excel cell address such as `A1`, `Sheet1!B3`. (string, not required)
|
52
|
+
* **numeric_format**: format of numeric(double) to string such as `%4.2f`. (default: Java's Double.toString())
|
53
|
+
* **attribute_name**: use with value `cell_style`, `cell_font`, etc. see below. (list of string)
|
54
|
+
* **on_cell_error**: processing method of Cell error. see below. (string, default: `constant`)
|
55
|
+
* **formula_handling**: processing method of formula. see below. (`evaluate` or `cashed_value`. default: `evaluate`)
|
56
|
+
* **on_evaluate_error**: processing method of evaluate formula error. see below. (string, default: `exception`)
|
57
|
+
* **formula_replace**: replace formula before evaluate. see below.
|
58
|
+
* **on_convert_error**: processing method of convert error. see below. (string, default: `exception`)
|
59
|
+
* **search_merged_cell**: search merged cell when cell is BLANK. (`none`, `linear_search`, `tree_search` or `hash_search`, default: `hash_search`)
|
60
|
+
|
61
|
+
### value
|
62
|
+
|
63
|
+
* `cell_value`: value in cell.
|
64
|
+
* `cell_formula`: formula in cell. (if cell is not formula, same `cell_value`.)
|
65
|
+
* `cell_style`: all cell style attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
66
|
+
* `cell_font`: all cell font attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
67
|
+
* `cell_comment`: all cell comment attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
68
|
+
* `cell_type`: cell type. returned Cell.getCellType() of POI.
|
69
|
+
* `cell_cached_type`: cell cached formula result type. returned Cell.getCachedFormulaResultType() of POI when CellType==FORMULA, otherwise same as `cell_type` (returned Cell.getCellType()).
|
70
|
+
* `file_name`: excel file name.
|
71
|
+
* `sheet_name`: sheet name.
|
72
|
+
* `row_number`: row number(1 origin).
|
73
|
+
* `column_number`: column number(1 origin).
|
74
|
+
* `constant`: constant value.
|
75
|
+
|
76
|
+
* `constant.`*value*: specified value.
|
77
|
+
* `constant`: null.
|
78
|
+
|
79
|
+
### cell_column
|
80
|
+
|
81
|
+
Basically used for **record_type**=`row`.
|
82
|
+
|
83
|
+
* `A`,`B`,`C`,...: column number of "A1 format".
|
84
|
+
* *number*: column number (1 origin).
|
85
|
+
* `+`: next column.
|
86
|
+
* `+`*name*: next column of name.
|
87
|
+
* `+`*number*: number next column.
|
88
|
+
* `-`: previous column.
|
89
|
+
* `-`*name*: previous column of name.
|
90
|
+
* `-`*number*: number previous column.
|
91
|
+
* `=`: same column.
|
92
|
+
* `=`*name*: same column of name.
|
93
|
+
|
94
|
+
### cell_row
|
95
|
+
|
96
|
+
Basically used for **record_type**=`column`.
|
97
|
+
|
98
|
+
* *number*: row number (1 origin).
|
99
|
+
|
100
|
+
### attribute_name
|
101
|
+
|
102
|
+
**value**が`cell_style`, `cell_font`, `cell_comment`のとき、デフォルトでは、全属性を取得してJSON文字列に変換します。
|
103
|
+
(JSON文字列を返すので、**type**は`string`である必要があります)
|
104
|
+
|
105
|
+
```yaml
|
106
|
+
columns:
|
107
|
+
- {name: foo, type: string, cell_column: A, value: cell_style}
|
108
|
+
```
|
109
|
+
|
110
|
+
|
111
|
+
attribute_nameを指定することで、指定された属性だけを取得してJSON文字列に変換します。
|
112
|
+
|
113
|
+
* **attribute_name**: attribute names. (list of string)
|
114
|
+
|
115
|
+
```yaml
|
116
|
+
columns:
|
117
|
+
- {name: foo, type: string, cell_column: A, value: cell_style, attribute_name: [border_top, border_bottom, border_left, border_right]}
|
118
|
+
```
|
119
|
+
|
120
|
+
|
121
|
+
また、`cell_style`や`cell_font`の直後にピリオドを付けて属性名を指定することにより、その属性だけを取得することが出来ます。
|
122
|
+
この場合はJSON文字列にはならず、属性の型に合う**type**を指定する必要があります。
|
123
|
+
|
124
|
+
```yaml
|
125
|
+
columns:
|
126
|
+
- {name: foo, type: long, value: cell_style.border}
|
127
|
+
- {name: bar, type: long, value: cell_font.color}
|
128
|
+
```
|
129
|
+
|
130
|
+
なお、`cell_style`や`cell_font`では、**cell_column**を省略した場合は直前と同じ列を対象とします。
|
131
|
+
(`cell_value`では、**cell_column**を省略すると次の列に移る)
|
132
|
+
|
133
|
+
|
134
|
+
### on_cell_error
|
135
|
+
|
136
|
+
Processing method of Cell error (`#DIV/0!`, `#REF!`, etc).
|
137
|
+
|
138
|
+
```yaml
|
139
|
+
columns:
|
140
|
+
- {name: foo, type: string, cell_column: A, value: cell_value, on_cell_error: error_code}
|
141
|
+
```
|
142
|
+
|
143
|
+
* `constant`: set null. (default)
|
144
|
+
* `constant.`*value*: set specified value.
|
145
|
+
* `error_code`: set error code.
|
146
|
+
* `exception`: throw exception.
|
147
|
+
|
148
|
+
|
149
|
+
### formula_handling
|
150
|
+
|
151
|
+
Processing method of formula.
|
152
|
+
|
153
|
+
```yaml
|
154
|
+
columns:
|
155
|
+
- {name: foo, type: string, cell_column: A, value: cell_value, formula_handling: cashed_value}
|
156
|
+
```
|
157
|
+
|
158
|
+
* `evaluate`: evaluate formula. (default)
|
159
|
+
* `cashed_value`: cashed value in cell.
|
160
|
+
|
161
|
+
|
162
|
+
### on_evaluate_error
|
163
|
+
|
164
|
+
Processing method of evaluate formula error.
|
165
|
+
|
166
|
+
```yaml
|
167
|
+
columns:
|
168
|
+
- {name: foo, type: string, cell_column: A, value: cell_value, on_evaluate_error: constant}
|
169
|
+
```
|
170
|
+
|
171
|
+
* `constant`: set null.
|
172
|
+
* `constant.`*value*: set specified value.
|
173
|
+
* `exception`: throw exception. (default)
|
174
|
+
|
175
|
+
|
176
|
+
### formula_replace
|
177
|
+
|
178
|
+
Replace formula before evaluate.
|
179
|
+
|
180
|
+
```yaml
|
181
|
+
columns:
|
182
|
+
- {name: foo, type: string, cell_column: A, value: cell_value, formula_replace: [{regex: aaa, to: "A${row}"}, {regex: bbb, to: "B${row}"}]}
|
183
|
+
```
|
184
|
+
|
185
|
+
`${row}` is replaced with the current row number.
|
186
|
+
`${column}` is replaced with the current column string.
|
187
|
+
|
188
|
+
|
189
|
+
### on_convert_error
|
190
|
+
|
191
|
+
Processing method of convert error. ex) Excel boolean to Embulk timestamp
|
192
|
+
|
193
|
+
```yaml
|
194
|
+
columns:
|
195
|
+
- {name: foo, type: timestamp, format: "%Y/%m/%d", cell_column: A, value: cell_value, on_convert_error: constant.9999/12/31}
|
196
|
+
```
|
197
|
+
|
198
|
+
* `constant`: set null.
|
199
|
+
* `constant.`*value*: set specified value.
|
200
|
+
* `exception`: throw exception. (default)
|
201
|
+
|
202
|
+
|
203
|
+
### sheet_options
|
204
|
+
|
205
|
+
Options of individual sheet.
|
206
|
+
|
207
|
+
```yaml
|
208
|
+
parser:
|
209
|
+
type: poi_excel
|
210
|
+
sheets: [Sheet1, Sheet2]
|
211
|
+
columns:
|
212
|
+
- {name: date, type: timestamp, cell_column: A}
|
213
|
+
- {name: foo, type: string}
|
214
|
+
- {name: bar, type: long}
|
215
|
+
sheet_options:
|
216
|
+
Sheet1:
|
217
|
+
skip_header_lines: 1
|
218
|
+
columns:
|
219
|
+
foo: {cell_column: B}
|
220
|
+
bar: {cell_column: C}
|
221
|
+
Sheet2:
|
222
|
+
skip_header_lines: 0
|
223
|
+
columns:
|
224
|
+
foo: {cell_column: D}
|
225
|
+
bar: {value: constant.0}
|
226
|
+
```
|
227
|
+
|
228
|
+
**sheet_options** is map of sheet name.
|
229
|
+
Map values are **skip_header_lines**, **columns**.
|
230
|
+
|
231
|
+
**columns** is map of column name.
|
232
|
+
Map values are same **columns** in **parser** (excluding `name`, `type`).
|
233
|
+
|
234
|
+
|
235
|
+
## Install
|
236
|
+
|
237
|
+
```
|
238
|
+
$ embulk gem install embulk-parser-poi_excel
|
239
|
+
```
|
240
|
+
|
241
|
+
|
242
|
+
## Build
|
243
|
+
|
244
|
+
```
|
245
|
+
$ ./gradlew test
|
246
|
+
$ ./gradlew package
|
247
|
+
```
|