embulk-parser-poi_excel 0.1.4 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +52 -5
  3. data/build.gradle +17 -11
  4. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  5. data/gradle/wrapper/gradle-wrapper.properties +5 -6
  6. data/gradlew +43 -35
  7. data/gradlew.bat +4 -10
  8. data/src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java +4 -0
  9. data/src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java +80 -3
  10. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnBean.java +110 -6
  11. data/src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelColumnIndex.java +74 -37
  12. data/src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java +50 -0
  13. data/src/main/java/org/embulk/parser/poi_excel/bean/util/SearchMergedCell.java +71 -0
  14. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java +0 -6
  15. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java +11 -11
  16. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java +52 -0
  17. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java +79 -40
  18. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java +1 -1
  19. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java +64 -4
  20. data/src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorFactory.java +14 -0
  21. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/BooleanCellVisitor.java +5 -0
  22. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java +3 -0
  23. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/DoubleCellVisitor.java +5 -0
  24. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/LongCellVisitor.java +5 -0
  25. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java +30 -2
  26. data/src/main/java/org/embulk/parser/poi_excel/visitor/embulk/TimestampCellVisitor.java +5 -0
  27. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionFinder.java +9 -0
  28. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionList.java +20 -0
  29. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionMap.java +55 -0
  30. data/src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionNothing.java +12 -0
  31. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java +27 -79
  32. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellAddress.java +69 -0
  33. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellComment.java +1 -1
  34. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java +1 -1
  35. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java +1 -1
  36. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java +14 -14
  37. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java +79 -0
  38. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java +1 -1
  39. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java +1 -1
  40. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java +1 -1
  41. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_formula.java +90 -0
  42. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_mergedCell.java +94 -0
  43. data/src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java +35 -1
  44. metadata +30 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: be106894c6b2167dd2f275cf8696003cc82195c3
4
- data.tar.gz: c6f3718249f56278a0a29dd50521216765838c51
3
+ metadata.gz: 3ac332670f656bcf9bbf5fe659b53a9a5d61ed34
4
+ data.tar.gz: b08187d453742b61242f3c0a414b859e7b3df605
5
5
  SHA512:
6
- metadata.gz: 6c88d6b4604a07b40681a44290bdf75c7f8c10e6e857da5ed254fe7828fc796e54f84e0f37de1e5c9f0089c934dd91283783b95849180b620cf1180a70d8a419
7
- data.tar.gz: 772b8d21d4e1587acd97886a79b284a1db42d8a12ac30660a0d2eba9db61292839ceeef39f65d8756e0ee4aea58c7a3c9e3785fcda31c51f01708b116497c6d7
6
+ metadata.gz: d10568439d7728ce805c8a4945dce146b3c5b3983b1be2433b13986a07be0ac0cb52763daebe5c2cc0e0154403e3ec3a3fbbc57e1ff6112afb01a23157ad11da
7
+ data.tar.gz: e3b6f6efa059db9b8994ae78700cc8c50f67d80059b76eb41dafbc1b5ce26f1b133c1def929a7e4aa1d1f0fdaaff0d174683d2dba86956948276d2038275dd2e
data/README.md CHANGED
@@ -28,12 +28,12 @@ in:
28
28
  ```
29
29
 
30
30
  if omit **value**, specified `cell_value`.
31
- if omit **column_number** when **valus** is `cell_value`, specified next column.
31
+ if omit **column_number** when **value** is `cell_value`, specified next column.
32
32
 
33
33
 
34
34
  ## Configuration
35
35
 
36
- * **sheets**: sheet name. (list of string, required)
36
+ * **sheets**: sheet name. can use wildcards `*`, `?`. (list of string, required)
37
37
  * **skip_header_lines**: skip rows. (integer, default: `0`)
38
38
  * **columns**: column definition. see below. (hash, required)
39
39
  * **sheet_options**: sheet option. see below. (hash, default: null)
@@ -44,9 +44,15 @@ if omit **column_number** when **valus** is `cell_value`, specified next column.
44
44
  * **type**: Embulk column type. (string, required)
45
45
  * **value**: value type. see below. (string, defualt: `cell_value`)
46
46
  * **column_number**: Excel column number. see below. (string, default: next column)
47
+ * **cell_address**: Excel cell address such as `A1`, `Sheet1!B3`. only one of `column_number`, `cell_address` can be specified. (string, not required)
48
+ * **numeric_format**: format of numeric(double) to string such as `%4.2f`. (default: Java's Double.toString())
47
49
  * **attribute_name**: use with value `cell_style`, `cell_font`, etc. see below. (list of string)
48
50
  * **on_cell_error**: processing method of Cell error. see below. (string, default: `constant`)
51
+ * **formula_handling**: processing method of formula. see below. (`evaluate` or `cashed_value`. default: `evaluate`)
52
+ * **on_evaluate_error**: processing method of evaluate formula error. see below. (string, default: `exception`)
53
+ * **formula_replace**: replace formula before evaluate. see below.
49
54
  * **on_convert_error**: processing method of convert error. see below. (string, default: `exception`)
55
+ * **search_merged_cell**: search merged cell when cell is BLANK. (`none`, `linear_search`, `tree_search` or `hash_search`, default: `hash_search`)
50
56
 
51
57
  ### value
52
58
 
@@ -55,6 +61,8 @@ if omit **column_number** when **valus** is `cell_value`, specified next column.
55
61
  * `cell_style`: all cell style attributes. returned json string. see **attribute_name**. (**type** required `string`)
56
62
  * `cell_font`: all cell font attributes. returned json string. see **attribute_name**. (**type** required `string`)
57
63
  * `cell_comment`: all cell comment attributes. returned json string. see **attribute_name**. (**type** required `string`)
64
+ * `cell_type`: cell type. returned Cell.getCellType() of POI.
65
+ * `cell_cached_type`: cell cached formula result type. returned Cell.getCachedFormulaResultType() of POI when CellType==FORMULA, otherwise same as `cell_type` (returned Cell.getCellType()).
58
66
  * `sheet_name`: sheet name.
59
67
  * `row_number`: row number(1 origin).
60
68
  * `column_number`: column number(1 origin).
@@ -120,11 +128,50 @@ Processing method of Cell error (`#DIV/0!`, `#REF!`, etc).
120
128
  ```
121
129
 
122
130
  * `constant`: set null. (default)
123
- * `constant.`*value*: set value.
131
+ * `constant.`*value*: set specified value.
124
132
  * `error_code`: set error code.
125
133
  * `exception`: throw exception.
126
134
 
127
135
 
136
+ ### formula_handling
137
+
138
+ Processing method of formula.
139
+
140
+ ```yaml
141
+ columns:
142
+ - {name: foo, type: string, column_number: A, value: cell_value, formula_handling: cashed_value}
143
+ ```
144
+
145
+ * `evaluate`: evaluate formula. (default)
146
+ * `cashed_value`: cashed value in cell.
147
+
148
+
149
+ ### on_evaluate_error
150
+
151
+ Processing method of evaluate formula error.
152
+
153
+ ```yaml
154
+ columns:
155
+ - {name: foo, type: string, column_number: A, value: cell_value, on_evaluate_error: constant}
156
+ ```
157
+
158
+ * `constant`: set null.
159
+ * `constant.`*value*: set value.
160
+ * `exception`: throw exception. (default)
161
+
162
+
163
+ ### formula_replace
164
+
165
+ Replace formula before evaluate.
166
+
167
+ ```yaml
168
+ columns:
169
+ - {name: foo, type: string, column_number: A, value: cell_value, formula_replace: [{regex: aaa, to: "A${row}"}, {regex: bbb, to: "B${row}"}]}
170
+ ```
171
+
172
+ `${row}` is replaced with the current row number.
173
+
174
+
128
175
  ### on_convert_error
129
176
 
130
177
  Processing method of convert error. ex) Excel boolean to Embulk timestamp
@@ -141,7 +188,7 @@ Processing method of convert error. ex) Excel boolean to Embulk timestamp
141
188
 
142
189
  ### sheet_options
143
190
 
144
- Options of indivisual sheet.
191
+ Options of individual sheet.
145
192
 
146
193
  ```yaml
147
194
  parser:
@@ -165,7 +212,7 @@ Options of indivisual sheet.
165
212
  ```
166
213
 
167
214
  **sheet_options** is map of sheet name.
168
- Map values are **skip_header_lines**, **colums**.
215
+ Map values are **skip_header_lines**, **columns**.
169
216
 
170
217
  **columns** is map of column name.
171
218
  Map values are same **columns** in **parser** (excluding `name`, `type`).
@@ -1,6 +1,6 @@
1
1
  plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
- id "com.github.jruby-gradle.base" version "0.1.5"
3
+ id "com.github.jruby-gradle.base" version "1.5.0"
4
4
  id "java"
5
5
  id "eclipse"
6
6
  }
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.4"
16
+ version = "0.1.11"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -23,8 +23,10 @@ dependencies {
23
23
  provided "org.embulk:embulk-core:0.7.5"
24
24
  compile "org.embulk:embulk-standards:0.7.5"
25
25
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
26
- compile group: 'org.apache.poi', name : 'poi', version: '3.13'
27
- compile group: 'org.apache.poi', name : 'poi-ooxml', version: '3.13'
26
+ compile group: 'org.apache.poi', name : 'poi', version: '3.17'
27
+ compile(group: 'org.apache.poi', name : 'poi-ooxml', version: '3.17') {
28
+ exclude group: 'stax', module: 'stax-api'
29
+ }
28
30
  testCompile "junit:junit:4.+"
29
31
  }
30
32
 
@@ -36,19 +38,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
36
38
  clean { delete "classpath" }
37
39
 
38
40
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
39
- jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
40
- script "${project.name}.gemspec"
41
+ jrubyArgs "-S"
42
+ script "gem"
43
+ scriptArgs "build", "${project.name}.gemspec"
41
44
  doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
42
45
  }
43
46
 
44
47
  task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
45
- jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
46
- script "pkg/${project.name}-${project.version}.gem"
48
+ jrubyArgs "-S"
49
+ script "gem"
50
+ scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
47
51
  }
48
52
 
49
- task "package"(dependsOn: ["gemspec", "classpath"]) << {
50
- println "> Build succeeded."
51
- println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
53
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
54
+ doLast {
55
+ println "> Build succeeded."
56
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
57
+ }
52
58
  }
53
59
 
54
60
  task gemspec {
@@ -1,6 +1,5 @@
1
- #Wed Feb 04 13:46:12 PST 2015
2
- distributionBase=GRADLE_USER_HOME
3
- distributionPath=wrapper/dists
4
- zipStoreBase=GRADLE_USER_HOME
5
- zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
1
+ distributionBase=GRADLE_USER_HOME
2
+ distributionPath=wrapper/dists
3
+ zipStoreBase=GRADLE_USER_HOME
4
+ zipStorePath=wrapper/dists
5
+ distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip
data/gradlew CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env bash
1
+ #!/usr/bin/env sh
2
2
 
3
3
  ##############################################################################
4
4
  ##
@@ -6,20 +6,38 @@
6
6
  ##
7
7
  ##############################################################################
8
8
 
9
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
- DEFAULT_JVM_OPTS=""
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
11
26
 
12
27
  APP_NAME="Gradle"
13
28
  APP_BASE_NAME=`basename "$0"`
14
29
 
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
15
33
  # Use the maximum available, or set MAX_FD != -1 to use that value.
16
34
  MAX_FD="maximum"
17
35
 
18
- warn ( ) {
36
+ warn () {
19
37
  echo "$*"
20
38
  }
21
39
 
22
- die ( ) {
40
+ die () {
23
41
  echo
24
42
  echo "$*"
25
43
  echo
@@ -30,6 +48,7 @@ die ( ) {
30
48
  cygwin=false
31
49
  msys=false
32
50
  darwin=false
51
+ nonstop=false
33
52
  case "`uname`" in
34
53
  CYGWIN* )
35
54
  cygwin=true
@@ -40,31 +59,11 @@ case "`uname`" in
40
59
  MINGW* )
41
60
  msys=true
42
61
  ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
43
65
  esac
44
66
 
45
- # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
- if $cygwin ; then
47
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
- fi
49
-
50
- # Attempt to set APP_HOME
51
- # Resolve links: $0 may be a link
52
- PRG="$0"
53
- # Need this for relative symlinks.
54
- while [ -h "$PRG" ] ; do
55
- ls=`ls -ld "$PRG"`
56
- link=`expr "$ls" : '.*-> \(.*\)$'`
57
- if expr "$link" : '/.*' > /dev/null; then
58
- PRG="$link"
59
- else
60
- PRG=`dirname "$PRG"`"/$link"
61
- fi
62
- done
63
- SAVED="`pwd`"
64
- cd "`dirname \"$PRG\"`/" >&-
65
- APP_HOME="`pwd -P`"
66
- cd "$SAVED" >&-
67
-
68
67
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
68
 
70
69
  # Determine the Java command to use to start the JVM.
@@ -90,7 +89,7 @@ location of your Java installation."
90
89
  fi
91
90
 
92
91
  # Increase the maximum file descriptors if we can.
93
- if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
94
93
  MAX_FD_LIMIT=`ulimit -H -n`
95
94
  if [ $? -eq 0 ] ; then
96
95
  if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
@@ -114,6 +113,7 @@ fi
114
113
  if $cygwin ; then
115
114
  APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
115
  CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
117
 
118
118
  # We build the pattern for arguments to be converted via cygpath
119
119
  ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
@@ -154,11 +154,19 @@ if $cygwin ; then
154
154
  esac
155
155
  fi
156
156
 
157
- # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
- function splitJvmOpts() {
159
- JVM_OPTS=("$@")
157
+ # Escape application args
158
+ save () {
159
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160
+ echo " "
160
161
  }
161
- eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
- JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
162
+ APP_ARGS=$(save "$@")
163
+
164
+ # Collect all arguments for the java command, following the shell quoting and substitution rules
165
+ eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166
+
167
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168
+ if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169
+ cd "$(dirname "$0")"
170
+ fi
163
171
 
164
- exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
172
+ exec "$JAVACMD" "$@"
@@ -8,14 +8,14 @@
8
8
  @rem Set local scope for the variables with windows NT shell
9
9
  if "%OS%"=="Windows_NT" setlocal
10
10
 
11
- @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
- set DEFAULT_JVM_OPTS=
13
-
14
11
  set DIRNAME=%~dp0
15
12
  if "%DIRNAME%" == "" set DIRNAME=.
16
13
  set APP_BASE_NAME=%~n0
17
14
  set APP_HOME=%DIRNAME%
18
15
 
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
19
  @rem Find java.exe
20
20
  if defined JAVA_HOME goto findJavaFromJavaHome
21
21
 
@@ -46,10 +46,9 @@ echo location of your Java installation.
46
46
  goto fail
47
47
 
48
48
  :init
49
- @rem Get command-line arguments, handling Windowz variants
49
+ @rem Get command-line arguments, handling Windows variants
50
50
 
51
51
  if not "%OS%" == "Windows_NT" goto win9xME_args
52
- if "%@eval[2+2]" == "4" goto 4NT_args
53
52
 
54
53
  :win9xME_args
55
54
  @rem Slurp the command line arguments.
@@ -60,11 +59,6 @@ set _SKIP=2
60
59
  if "x%~1" == "x" goto execute
61
60
 
62
61
  set CMD_LINE_ARGS=%*
63
- goto execute
64
-
65
- :4NT_args
66
- @rem Get arguments from the 4NT Shell from JP Software
67
- set CMD_LINE_ARGS=%$
68
62
 
69
63
  :execute
70
64
  @rem Setup the command line
@@ -11,6 +11,10 @@ public enum PoiExcelColumnValueType {
11
11
  CELL_FONT(true, false),
12
12
  /** cell comment */
13
13
  CELL_COMMENT(true, false),
14
+ /** cell type */
15
+ CELL_TYPE(true, false),
16
+ /** cell CachedFormulaResultType */
17
+ CELL_CACHED_TYPE(true, false),
14
18
  /** sheet name */
15
19
  SHEET_NAME(false, false),
16
20
  /** row number (1 origin) */
@@ -2,8 +2,11 @@ package org.embulk.parser.poi_excel;
2
2
 
3
3
  import java.io.IOException;
4
4
  import java.util.ArrayList;
5
+ import java.util.LinkedHashSet;
5
6
  import java.util.List;
6
7
  import java.util.Map;
8
+ import java.util.Set;
9
+ import java.util.regex.Pattern;
7
10
 
8
11
  import org.apache.poi.EncryptedDocumentException;
9
12
  import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
@@ -93,6 +96,11 @@ public class PoiExcelParserPlugin implements ParserPlugin {
93
96
  @ConfigDefault("null")
94
97
  public Optional<String> getColumnNumber();
95
98
 
99
+ // A1,B2,... or Sheet1!A1
100
+ @Config("cell_address")
101
+ @ConfigDefault("null")
102
+ public Optional<String> getCellAddress();
103
+
96
104
  // use when value_type=cell_style, cell_font, ...
97
105
  @Config("attribute_name")
98
106
  @ConfigDefault("null")
@@ -100,11 +108,19 @@ public class PoiExcelParserPlugin implements ParserPlugin {
100
108
  }
101
109
 
102
110
  public interface ColumnCommonOptionTask extends Task {
111
+ // format of numeric(double) to string
112
+ @Config("numeric_format")
113
+ @ConfigDefault("null")
114
+ public Optional<String> getNumericFormat();
103
115
 
104
116
  // search merged cell if cellType=BLANK
105
117
  @Config("search_merged_cell")
106
118
  @ConfigDefault("null")
107
- public Optional<Boolean> getSearchMergedCell();
119
+ public Optional<String> getSearchMergedCell();
120
+
121
+ @Config("formula_handling")
122
+ @ConfigDefault("null")
123
+ public Optional<String> getFormulaHandling();
108
124
 
109
125
  @Config("formula_replace")
110
126
  @ConfigDefault("null")
@@ -166,9 +182,60 @@ public class PoiExcelParserPlugin implements ParserPlugin {
166
182
  throw new RuntimeException(e);
167
183
  }
168
184
 
169
- run(task, schema, workbook, sheetNames, output);
185
+ List<String> list = resolveSheetName(workbook, sheetNames);
186
+ if (log.isDebugEnabled()) {
187
+ log.debug("resolved sheet names={}", list);
188
+ }
189
+ run(task, schema, workbook, list, output);
190
+ }
191
+ }
192
+ }
193
+
194
+ private List<String> resolveSheetName(Workbook workbook, List<String> sheetNames) {
195
+ Set<String> set = new LinkedHashSet<>();
196
+ for (String s : sheetNames) {
197
+ if (s.contains("*") || s.contains("?")) {
198
+ int length = s.length();
199
+ StringBuilder sb = new StringBuilder(length * 2);
200
+ StringBuilder buf = new StringBuilder(32);
201
+ for (int i = 0; i < length;) {
202
+ int c = s.codePointAt(i);
203
+ switch (c) {
204
+ case '*':
205
+ if (buf.length() > 0) {
206
+ sb.append(Pattern.quote(buf.toString()));
207
+ buf.setLength(0);
208
+ }
209
+ sb.append(".*");
210
+ break;
211
+ case '?':
212
+ if (buf.length() > 0) {
213
+ sb.append(Pattern.quote(buf.toString()));
214
+ buf.setLength(0);
215
+ }
216
+ sb.append(".");
217
+ break;
218
+ default:
219
+ buf.appendCodePoint(c);
220
+ break;
221
+ }
222
+ i += Character.charCount(c);
223
+ }
224
+ if (buf.length() > 0) {
225
+ sb.append(Pattern.quote(buf.toString()));
226
+ }
227
+ String regex = sb.toString();
228
+ for (Sheet sheet : workbook) {
229
+ String name = sheet.getSheetName();
230
+ if (name.matches(regex)) {
231
+ set.add(name);
232
+ }
233
+ }
234
+ } else {
235
+ set.add(s);
170
236
  }
171
237
  }
238
+ return new ArrayList<>(set);
172
239
  }
173
240
 
174
241
  protected void run(PluginTask task, Schema schema, Workbook workbook, List<String> sheetNames, PageOutput output) {
@@ -193,18 +260,28 @@ public class PoiExcelParserPlugin implements ParserPlugin {
193
260
 
194
261
  int count = 0;
195
262
  for (Row row : sheet) {
196
- if (row.getRowNum() < skipHeaderLines) {
263
+ int rowIndex = row.getRowNum();
264
+ if (rowIndex < skipHeaderLines) {
265
+ log.debug("row({}) skipped", rowIndex);
197
266
  continue;
198
267
  }
268
+ if (log.isDebugEnabled()) {
269
+ log.debug("row({}) start", rowIndex);
270
+ }
199
271
 
200
272
  visitor.setRow(row);
201
273
  schema.visitColumns(visitor);
202
274
  pageBuilder.addRecord();
203
275
 
204
276
  if (++count >= flushCount) {
277
+ log.trace("flush");
205
278
  pageBuilder.flush();
206
279
  count = 0;
207
280
  }
281
+
282
+ if (log.isDebugEnabled()) {
283
+ log.debug("row({}) end", rowIndex);
284
+ }
208
285
  }
209
286
  pageBuilder.flush();
210
287
  }