embulk-parser-poi_excel 0.1.4 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -5
- data/build.gradle +17 -11
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -6
- data/gradlew +43 -35
- data/gradlew.bat +4 -10
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +4 -0
- data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +80 -3
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +110 -6
- data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +74 -37
- data/src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java +50 -0
- data/src/main/java/org/embulk/parser/poi_excel/bean/util/SearchMergedCell.java +71 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java +0 -6
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java +11 -11
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java +52 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java +79 -40
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java +1 -1
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +64 -4
- data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorFactory.java +14 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/BooleanCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +3 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/DoubleCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/LongCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java +30 -2
- data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/TimestampCellVisitor.java +5 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionFinder.java +9 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionList.java +20 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionMap.java +55 -0
- data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionNothing.java +12 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java +27 -79
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellAddress.java +69 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellComment.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java +14 -14
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java +79 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java +1 -1
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_formula.java +90 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_mergedCell.java +94 -0
- data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java +35 -1
- metadata +30 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ac332670f656bcf9bbf5fe659b53a9a5d61ed34
|
4
|
+
data.tar.gz: b08187d453742b61242f3c0a414b859e7b3df605
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d10568439d7728ce805c8a4945dce146b3c5b3983b1be2433b13986a07be0ac0cb52763daebe5c2cc0e0154403e3ec3a3fbbc57e1ff6112afb01a23157ad11da
|
7
|
+
data.tar.gz: e3b6f6efa059db9b8994ae78700cc8c50f67d80059b76eb41dafbc1b5ce26f1b133c1def929a7e4aa1d1f0fdaaff0d174683d2dba86956948276d2038275dd2e
|
data/README.md
CHANGED
@@ -28,12 +28,12 @@ in:
|
|
28
28
|
```
|
29
29
|
|
30
30
|
if omit **value**, specified `cell_value`.
|
31
|
-
if omit **column_number** when **
|
31
|
+
if omit **column_number** when **value** is `cell_value`, specified next column.
|
32
32
|
|
33
33
|
|
34
34
|
## Configuration
|
35
35
|
|
36
|
-
* **sheets**: sheet name. (list of string, required)
|
36
|
+
* **sheets**: sheet name. can use wildcards `*`, `?`. (list of string, required)
|
37
37
|
* **skip_header_lines**: skip rows. (integer, default: `0`)
|
38
38
|
* **columns**: column definition. see below. (hash, required)
|
39
39
|
* **sheet_options**: sheet option. see below. (hash, default: null)
|
@@ -44,9 +44,15 @@ if omit **column_number** when **valus** is `cell_value`, specified next column.
|
|
44
44
|
* **type**: Embulk column type. (string, required)
|
45
45
|
* **value**: value type. see below. (string, defualt: `cell_value`)
|
46
46
|
* **column_number**: Excel column number. see below. (string, default: next column)
|
47
|
+
* **cell_address**: Excel cell address such as `A1`, `Sheet1!B3`. only one of `column_number`, `cell_address` can be specified. (string, not required)
|
48
|
+
* **numeric_format**: format of numeric(double) to string such as `%4.2f`. (default: Java's Double.toString())
|
47
49
|
* **attribute_name**: use with value `cell_style`, `cell_font`, etc. see below. (list of string)
|
48
50
|
* **on_cell_error**: processing method of Cell error. see below. (string, default: `constant`)
|
51
|
+
* **formula_handling**: processing method of formula. see below. (`evaluate` or `cashed_value`. default: `evaluate`)
|
52
|
+
* **on_evaluate_error**: processing method of evaluate formula error. see below. (string, default: `exception`)
|
53
|
+
* **formula_replace**: replace formula before evaluate. see below.
|
49
54
|
* **on_convert_error**: processing method of convert error. see below. (string, default: `exception`)
|
55
|
+
* **search_merged_cell**: search merged cell when cell is BLANK. (`none`, `linear_search`, `tree_search` or `hash_search`, default: `hash_search`)
|
50
56
|
|
51
57
|
### value
|
52
58
|
|
@@ -55,6 +61,8 @@ if omit **column_number** when **valus** is `cell_value`, specified next column.
|
|
55
61
|
* `cell_style`: all cell style attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
56
62
|
* `cell_font`: all cell font attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
57
63
|
* `cell_comment`: all cell comment attributes. returned json string. see **attribute_name**. (**type** required `string`)
|
64
|
+
* `cell_type`: cell type. returned Cell.getCellType() of POI.
|
65
|
+
* `cell_cached_type`: cell cached formula result type. returned Cell.getCachedFormulaResultType() of POI when CellType==FORMULA, otherwise same as `cell_type` (returned Cell.getCellType()).
|
58
66
|
* `sheet_name`: sheet name.
|
59
67
|
* `row_number`: row number(1 origin).
|
60
68
|
* `column_number`: column number(1 origin).
|
@@ -120,11 +128,50 @@ Processing method of Cell error (`#DIV/0!`, `#REF!`, etc).
|
|
120
128
|
```
|
121
129
|
|
122
130
|
* `constant`: set null. (default)
|
123
|
-
* `constant.`*value*: set value.
|
131
|
+
* `constant.`*value*: set specified value.
|
124
132
|
* `error_code`: set error code.
|
125
133
|
* `exception`: throw exception.
|
126
134
|
|
127
135
|
|
136
|
+
### formula_handling
|
137
|
+
|
138
|
+
Processing method of formula.
|
139
|
+
|
140
|
+
```yaml
|
141
|
+
columns:
|
142
|
+
- {name: foo, type: string, column_number: A, value: cell_value, formula_handling: cashed_value}
|
143
|
+
```
|
144
|
+
|
145
|
+
* `evaluate`: evaluate formula. (default)
|
146
|
+
* `cashed_value`: cashed value in cell.
|
147
|
+
|
148
|
+
|
149
|
+
### on_evaluate_error
|
150
|
+
|
151
|
+
Processing method of evaluate formula error.
|
152
|
+
|
153
|
+
```yaml
|
154
|
+
columns:
|
155
|
+
- {name: foo, type: string, column_number: A, value: cell_value, on_evaluate_error: constant}
|
156
|
+
```
|
157
|
+
|
158
|
+
* `constant`: set null.
|
159
|
+
* `constant.`*value*: set value.
|
160
|
+
* `exception`: throw exception. (default)
|
161
|
+
|
162
|
+
|
163
|
+
### formula_replace
|
164
|
+
|
165
|
+
Replace formula before evaluate.
|
166
|
+
|
167
|
+
```yaml
|
168
|
+
columns:
|
169
|
+
- {name: foo, type: string, column_number: A, value: cell_value, formula_replace: [{regex: aaa, to: "A${row}"}, {regex: bbb, to: "B${row}"}]}
|
170
|
+
```
|
171
|
+
|
172
|
+
`${row}` is replaced with the current row number.
|
173
|
+
|
174
|
+
|
128
175
|
### on_convert_error
|
129
176
|
|
130
177
|
Processing method of convert error. ex) Excel boolean to Embulk timestamp
|
@@ -141,7 +188,7 @@ Processing method of convert error. ex) Excel boolean to Embulk timestamp
|
|
141
188
|
|
142
189
|
### sheet_options
|
143
190
|
|
144
|
-
Options of
|
191
|
+
Options of individual sheet.
|
145
192
|
|
146
193
|
```yaml
|
147
194
|
parser:
|
@@ -165,7 +212,7 @@ Options of indivisual sheet.
|
|
165
212
|
```
|
166
213
|
|
167
214
|
**sheet_options** is map of sheet name.
|
168
|
-
Map values are **skip_header_lines**, **
|
215
|
+
Map values are **skip_header_lines**, **columns**.
|
169
216
|
|
170
217
|
**columns** is map of column name.
|
171
218
|
Map values are same **columns** in **parser** (excluding `name`, `type`).
|
data/build.gradle
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
plugins {
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
|
-
id "com.github.jruby-gradle.base" version "
|
3
|
+
id "com.github.jruby-gradle.base" version "1.5.0"
|
4
4
|
id "java"
|
5
5
|
id "eclipse"
|
6
6
|
}
|
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.11"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.7
|
19
19
|
targetCompatibility = 1.7
|
@@ -23,8 +23,10 @@ dependencies {
|
|
23
23
|
provided "org.embulk:embulk-core:0.7.5"
|
24
24
|
compile "org.embulk:embulk-standards:0.7.5"
|
25
25
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
26
|
-
compile group: 'org.apache.poi', name : 'poi', version: '3.
|
27
|
-
compile
|
26
|
+
compile group: 'org.apache.poi', name : 'poi', version: '3.17'
|
27
|
+
compile(group: 'org.apache.poi', name : 'poi-ooxml', version: '3.17') {
|
28
|
+
exclude group: 'stax', module: 'stax-api'
|
29
|
+
}
|
28
30
|
testCompile "junit:junit:4.+"
|
29
31
|
}
|
30
32
|
|
@@ -36,19 +38,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
36
38
|
clean { delete "classpath" }
|
37
39
|
|
38
40
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
39
|
-
jrubyArgs "-
|
40
|
-
script "
|
41
|
+
jrubyArgs "-S"
|
42
|
+
script "gem"
|
43
|
+
scriptArgs "build", "${project.name}.gemspec"
|
41
44
|
doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
|
42
45
|
}
|
43
46
|
|
44
47
|
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
45
|
-
jrubyArgs "-
|
46
|
-
script "
|
48
|
+
jrubyArgs "-S"
|
49
|
+
script "gem"
|
50
|
+
scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
|
47
51
|
}
|
48
52
|
|
49
|
-
task "package"(dependsOn: ["gemspec", "classpath"])
|
50
|
-
|
51
|
-
|
53
|
+
task "package"(dependsOn: ["gemspec", "classpath"]) {
|
54
|
+
doLast {
|
55
|
+
println "> Build succeeded."
|
56
|
+
println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
|
57
|
+
}
|
52
58
|
}
|
53
59
|
|
54
60
|
task gemspec {
|
Binary file
|
@@ -1,6 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
|
1
|
+
distributionBase=GRADLE_USER_HOME
|
2
|
+
distributionPath=wrapper/dists
|
3
|
+
zipStoreBase=GRADLE_USER_HOME
|
4
|
+
zipStorePath=wrapper/dists
|
5
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip
|
data/gradlew
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env sh
|
2
2
|
|
3
3
|
##############################################################################
|
4
4
|
##
|
@@ -6,20 +6,38 @@
|
|
6
6
|
##
|
7
7
|
##############################################################################
|
8
8
|
|
9
|
-
#
|
10
|
-
|
9
|
+
# Attempt to set APP_HOME
|
10
|
+
# Resolve links: $0 may be a link
|
11
|
+
PRG="$0"
|
12
|
+
# Need this for relative symlinks.
|
13
|
+
while [ -h "$PRG" ] ; do
|
14
|
+
ls=`ls -ld "$PRG"`
|
15
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
16
|
+
if expr "$link" : '/.*' > /dev/null; then
|
17
|
+
PRG="$link"
|
18
|
+
else
|
19
|
+
PRG=`dirname "$PRG"`"/$link"
|
20
|
+
fi
|
21
|
+
done
|
22
|
+
SAVED="`pwd`"
|
23
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
24
|
+
APP_HOME="`pwd -P`"
|
25
|
+
cd "$SAVED" >/dev/null
|
11
26
|
|
12
27
|
APP_NAME="Gradle"
|
13
28
|
APP_BASE_NAME=`basename "$0"`
|
14
29
|
|
30
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
31
|
+
DEFAULT_JVM_OPTS=""
|
32
|
+
|
15
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
34
|
MAX_FD="maximum"
|
17
35
|
|
18
|
-
warn (
|
36
|
+
warn () {
|
19
37
|
echo "$*"
|
20
38
|
}
|
21
39
|
|
22
|
-
die (
|
40
|
+
die () {
|
23
41
|
echo
|
24
42
|
echo "$*"
|
25
43
|
echo
|
@@ -30,6 +48,7 @@ die ( ) {
|
|
30
48
|
cygwin=false
|
31
49
|
msys=false
|
32
50
|
darwin=false
|
51
|
+
nonstop=false
|
33
52
|
case "`uname`" in
|
34
53
|
CYGWIN* )
|
35
54
|
cygwin=true
|
@@ -40,31 +59,11 @@ case "`uname`" in
|
|
40
59
|
MINGW* )
|
41
60
|
msys=true
|
42
61
|
;;
|
62
|
+
NONSTOP* )
|
63
|
+
nonstop=true
|
64
|
+
;;
|
43
65
|
esac
|
44
66
|
|
45
|
-
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
46
|
-
if $cygwin ; then
|
47
|
-
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
48
|
-
fi
|
49
|
-
|
50
|
-
# Attempt to set APP_HOME
|
51
|
-
# Resolve links: $0 may be a link
|
52
|
-
PRG="$0"
|
53
|
-
# Need this for relative symlinks.
|
54
|
-
while [ -h "$PRG" ] ; do
|
55
|
-
ls=`ls -ld "$PRG"`
|
56
|
-
link=`expr "$ls" : '.*-> \(.*\)$'`
|
57
|
-
if expr "$link" : '/.*' > /dev/null; then
|
58
|
-
PRG="$link"
|
59
|
-
else
|
60
|
-
PRG=`dirname "$PRG"`"/$link"
|
61
|
-
fi
|
62
|
-
done
|
63
|
-
SAVED="`pwd`"
|
64
|
-
cd "`dirname \"$PRG\"`/" >&-
|
65
|
-
APP_HOME="`pwd -P`"
|
66
|
-
cd "$SAVED" >&-
|
67
|
-
|
68
67
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
69
68
|
|
70
69
|
# Determine the Java command to use to start the JVM.
|
@@ -90,7 +89,7 @@ location of your Java installation."
|
|
90
89
|
fi
|
91
90
|
|
92
91
|
# Increase the maximum file descriptors if we can.
|
93
|
-
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
92
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
94
93
|
MAX_FD_LIMIT=`ulimit -H -n`
|
95
94
|
if [ $? -eq 0 ] ; then
|
96
95
|
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
@@ -114,6 +113,7 @@ fi
|
|
114
113
|
if $cygwin ; then
|
115
114
|
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
116
115
|
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
116
|
+
JAVACMD=`cygpath --unix "$JAVACMD"`
|
117
117
|
|
118
118
|
# We build the pattern for arguments to be converted via cygpath
|
119
119
|
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
@@ -154,11 +154,19 @@ if $cygwin ; then
|
|
154
154
|
esac
|
155
155
|
fi
|
156
156
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
157
|
+
# Escape application args
|
158
|
+
save () {
|
159
|
+
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
160
|
+
echo " "
|
160
161
|
}
|
161
|
-
|
162
|
-
|
162
|
+
APP_ARGS=$(save "$@")
|
163
|
+
|
164
|
+
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
165
|
+
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
166
|
+
|
167
|
+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
168
|
+
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
|
169
|
+
cd "$(dirname "$0")"
|
170
|
+
fi
|
163
171
|
|
164
|
-
exec "$JAVACMD" "
|
172
|
+
exec "$JAVACMD" "$@"
|
data/gradlew.bat
CHANGED
@@ -8,14 +8,14 @@
|
|
8
8
|
@rem Set local scope for the variables with windows NT shell
|
9
9
|
if "%OS%"=="Windows_NT" setlocal
|
10
10
|
|
11
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
-
set DEFAULT_JVM_OPTS=
|
13
|
-
|
14
11
|
set DIRNAME=%~dp0
|
15
12
|
if "%DIRNAME%" == "" set DIRNAME=.
|
16
13
|
set APP_BASE_NAME=%~n0
|
17
14
|
set APP_HOME=%DIRNAME%
|
18
15
|
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
19
|
@rem Find java.exe
|
20
20
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
21
|
|
@@ -46,10 +46,9 @@ echo location of your Java installation.
|
|
46
46
|
goto fail
|
47
47
|
|
48
48
|
:init
|
49
|
-
@rem Get command-line arguments, handling
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
50
|
|
51
51
|
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
52
|
|
54
53
|
:win9xME_args
|
55
54
|
@rem Slurp the command line arguments.
|
@@ -60,11 +59,6 @@ set _SKIP=2
|
|
60
59
|
if "x%~1" == "x" goto execute
|
61
60
|
|
62
61
|
set CMD_LINE_ARGS=%*
|
63
|
-
goto execute
|
64
|
-
|
65
|
-
:4NT_args
|
66
|
-
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
-
set CMD_LINE_ARGS=%$
|
68
62
|
|
69
63
|
:execute
|
70
64
|
@rem Setup the command line
|
@@ -11,6 +11,10 @@ public enum PoiExcelColumnValueType {
|
|
11
11
|
CELL_FONT(true, false),
|
12
12
|
/** cell comment */
|
13
13
|
CELL_COMMENT(true, false),
|
14
|
+
/** cell type */
|
15
|
+
CELL_TYPE(true, false),
|
16
|
+
/** cell CachedFormulaResultType */
|
17
|
+
CELL_CACHED_TYPE(true, false),
|
14
18
|
/** sheet name */
|
15
19
|
SHEET_NAME(false, false),
|
16
20
|
/** row number (1 origin) */
|
@@ -2,8 +2,11 @@ package org.embulk.parser.poi_excel;
|
|
2
2
|
|
3
3
|
import java.io.IOException;
|
4
4
|
import java.util.ArrayList;
|
5
|
+
import java.util.LinkedHashSet;
|
5
6
|
import java.util.List;
|
6
7
|
import java.util.Map;
|
8
|
+
import java.util.Set;
|
9
|
+
import java.util.regex.Pattern;
|
7
10
|
|
8
11
|
import org.apache.poi.EncryptedDocumentException;
|
9
12
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
@@ -93,6 +96,11 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
93
96
|
@ConfigDefault("null")
|
94
97
|
public Optional<String> getColumnNumber();
|
95
98
|
|
99
|
+
// A1,B2,... or Sheet1!A1
|
100
|
+
@Config("cell_address")
|
101
|
+
@ConfigDefault("null")
|
102
|
+
public Optional<String> getCellAddress();
|
103
|
+
|
96
104
|
// use when value_type=cell_style, cell_font, ...
|
97
105
|
@Config("attribute_name")
|
98
106
|
@ConfigDefault("null")
|
@@ -100,11 +108,19 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
100
108
|
}
|
101
109
|
|
102
110
|
public interface ColumnCommonOptionTask extends Task {
|
111
|
+
// format of numeric(double) to string
|
112
|
+
@Config("numeric_format")
|
113
|
+
@ConfigDefault("null")
|
114
|
+
public Optional<String> getNumericFormat();
|
103
115
|
|
104
116
|
// search merged cell if cellType=BLANK
|
105
117
|
@Config("search_merged_cell")
|
106
118
|
@ConfigDefault("null")
|
107
|
-
public Optional<
|
119
|
+
public Optional<String> getSearchMergedCell();
|
120
|
+
|
121
|
+
@Config("formula_handling")
|
122
|
+
@ConfigDefault("null")
|
123
|
+
public Optional<String> getFormulaHandling();
|
108
124
|
|
109
125
|
@Config("formula_replace")
|
110
126
|
@ConfigDefault("null")
|
@@ -166,9 +182,60 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
166
182
|
throw new RuntimeException(e);
|
167
183
|
}
|
168
184
|
|
169
|
-
|
185
|
+
List<String> list = resolveSheetName(workbook, sheetNames);
|
186
|
+
if (log.isDebugEnabled()) {
|
187
|
+
log.debug("resolved sheet names={}", list);
|
188
|
+
}
|
189
|
+
run(task, schema, workbook, list, output);
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
private List<String> resolveSheetName(Workbook workbook, List<String> sheetNames) {
|
195
|
+
Set<String> set = new LinkedHashSet<>();
|
196
|
+
for (String s : sheetNames) {
|
197
|
+
if (s.contains("*") || s.contains("?")) {
|
198
|
+
int length = s.length();
|
199
|
+
StringBuilder sb = new StringBuilder(length * 2);
|
200
|
+
StringBuilder buf = new StringBuilder(32);
|
201
|
+
for (int i = 0; i < length;) {
|
202
|
+
int c = s.codePointAt(i);
|
203
|
+
switch (c) {
|
204
|
+
case '*':
|
205
|
+
if (buf.length() > 0) {
|
206
|
+
sb.append(Pattern.quote(buf.toString()));
|
207
|
+
buf.setLength(0);
|
208
|
+
}
|
209
|
+
sb.append(".*");
|
210
|
+
break;
|
211
|
+
case '?':
|
212
|
+
if (buf.length() > 0) {
|
213
|
+
sb.append(Pattern.quote(buf.toString()));
|
214
|
+
buf.setLength(0);
|
215
|
+
}
|
216
|
+
sb.append(".");
|
217
|
+
break;
|
218
|
+
default:
|
219
|
+
buf.appendCodePoint(c);
|
220
|
+
break;
|
221
|
+
}
|
222
|
+
i += Character.charCount(c);
|
223
|
+
}
|
224
|
+
if (buf.length() > 0) {
|
225
|
+
sb.append(Pattern.quote(buf.toString()));
|
226
|
+
}
|
227
|
+
String regex = sb.toString();
|
228
|
+
for (Sheet sheet : workbook) {
|
229
|
+
String name = sheet.getSheetName();
|
230
|
+
if (name.matches(regex)) {
|
231
|
+
set.add(name);
|
232
|
+
}
|
233
|
+
}
|
234
|
+
} else {
|
235
|
+
set.add(s);
|
170
236
|
}
|
171
237
|
}
|
238
|
+
return new ArrayList<>(set);
|
172
239
|
}
|
173
240
|
|
174
241
|
protected void run(PluginTask task, Schema schema, Workbook workbook, List<String> sheetNames, PageOutput output) {
|
@@ -193,18 +260,28 @@ public class PoiExcelParserPlugin implements ParserPlugin {
|
|
193
260
|
|
194
261
|
int count = 0;
|
195
262
|
for (Row row : sheet) {
|
196
|
-
|
263
|
+
int rowIndex = row.getRowNum();
|
264
|
+
if (rowIndex < skipHeaderLines) {
|
265
|
+
log.debug("row({}) skipped", rowIndex);
|
197
266
|
continue;
|
198
267
|
}
|
268
|
+
if (log.isDebugEnabled()) {
|
269
|
+
log.debug("row({}) start", rowIndex);
|
270
|
+
}
|
199
271
|
|
200
272
|
visitor.setRow(row);
|
201
273
|
schema.visitColumns(visitor);
|
202
274
|
pageBuilder.addRecord();
|
203
275
|
|
204
276
|
if (++count >= flushCount) {
|
277
|
+
log.trace("flush");
|
205
278
|
pageBuilder.flush();
|
206
279
|
count = 0;
|
207
280
|
}
|
281
|
+
|
282
|
+
if (log.isDebugEnabled()) {
|
283
|
+
log.debug("row({}) end", rowIndex);
|
284
|
+
}
|
208
285
|
}
|
209
286
|
pageBuilder.flush();
|
210
287
|
}
|