embulk 0.8.15-java → 0.8.16-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/appveyor.yml +8 -0
- data/build.gradle +86 -45
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
- data/embulk-docs/src/built-in.rst +34 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.16.rst +43 -0
- data/embulk-standards/build.gradle +1 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +30 -21
- data/gradlew.bat +4 -10
- data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
- data/lib/embulk/data/new/java/build.gradle.erb +5 -3
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +30 -21
- data/lib/embulk/data/new/java/gradlew.bat +4 -10
- data/lib/embulk/guess/csv.rb +44 -22
- data/lib/embulk/guess/newline.rb +10 -4
- data/lib/embulk/guess_plugin.rb +3 -1
- data/lib/embulk/java/time_helper.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- metadata +92 -5
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
data/gradlew
CHANGED
@@ -6,12 +6,30 @@
|
|
6
6
|
##
|
7
7
|
##############################################################################
|
8
8
|
|
9
|
-
#
|
10
|
-
|
9
|
+
# Attempt to set APP_HOME
|
10
|
+
# Resolve links: $0 may be a link
|
11
|
+
PRG="$0"
|
12
|
+
# Need this for relative symlinks.
|
13
|
+
while [ -h "$PRG" ] ; do
|
14
|
+
ls=`ls -ld "$PRG"`
|
15
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
16
|
+
if expr "$link" : '/.*' > /dev/null; then
|
17
|
+
PRG="$link"
|
18
|
+
else
|
19
|
+
PRG=`dirname "$PRG"`"/$link"
|
20
|
+
fi
|
21
|
+
done
|
22
|
+
SAVED="`pwd`"
|
23
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
24
|
+
APP_HOME="`pwd -P`"
|
25
|
+
cd "$SAVED" >/dev/null
|
11
26
|
|
12
27
|
APP_NAME="Gradle"
|
13
28
|
APP_BASE_NAME=`basename "$0"`
|
14
29
|
|
30
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
31
|
+
DEFAULT_JVM_OPTS=""
|
32
|
+
|
15
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
34
|
MAX_FD="maximum"
|
17
35
|
|
@@ -30,6 +48,7 @@ die ( ) {
|
|
30
48
|
cygwin=false
|
31
49
|
msys=false
|
32
50
|
darwin=false
|
51
|
+
nonstop=false
|
33
52
|
case "`uname`" in
|
34
53
|
CYGWIN* )
|
35
54
|
cygwin=true
|
@@ -40,26 +59,11 @@ case "`uname`" in
|
|
40
59
|
MINGW* )
|
41
60
|
msys=true
|
42
61
|
;;
|
62
|
+
NONSTOP* )
|
63
|
+
nonstop=true
|
64
|
+
;;
|
43
65
|
esac
|
44
66
|
|
45
|
-
# Attempt to set APP_HOME
|
46
|
-
# Resolve links: $0 may be a link
|
47
|
-
PRG="$0"
|
48
|
-
# Need this for relative symlinks.
|
49
|
-
while [ -h "$PRG" ] ; do
|
50
|
-
ls=`ls -ld "$PRG"`
|
51
|
-
link=`expr "$ls" : '.*-> \(.*\)$'`
|
52
|
-
if expr "$link" : '/.*' > /dev/null; then
|
53
|
-
PRG="$link"
|
54
|
-
else
|
55
|
-
PRG=`dirname "$PRG"`"/$link"
|
56
|
-
fi
|
57
|
-
done
|
58
|
-
SAVED="`pwd`"
|
59
|
-
cd "`dirname \"$PRG\"`/" >/dev/null
|
60
|
-
APP_HOME="`pwd -P`"
|
61
|
-
cd "$SAVED" >/dev/null
|
62
|
-
|
63
67
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
64
68
|
|
65
69
|
# Determine the Java command to use to start the JVM.
|
@@ -85,7 +89,7 @@ location of your Java installation."
|
|
85
89
|
fi
|
86
90
|
|
87
91
|
# Increase the maximum file descriptors if we can.
|
88
|
-
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
92
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
89
93
|
MAX_FD_LIMIT=`ulimit -H -n`
|
90
94
|
if [ $? -eq 0 ] ; then
|
91
95
|
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
@@ -157,4 +161,9 @@ function splitJvmOpts() {
|
|
157
161
|
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
158
162
|
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
159
163
|
|
164
|
+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
165
|
+
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
|
166
|
+
cd "$(dirname "$0")"
|
167
|
+
fi
|
168
|
+
|
160
169
|
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
data/gradlew.bat
CHANGED
@@ -8,14 +8,14 @@
|
|
8
8
|
@rem Set local scope for the variables with windows NT shell
|
9
9
|
if "%OS%"=="Windows_NT" setlocal
|
10
10
|
|
11
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
-
set DEFAULT_JVM_OPTS=
|
13
|
-
|
14
11
|
set DIRNAME=%~dp0
|
15
12
|
if "%DIRNAME%" == "" set DIRNAME=.
|
16
13
|
set APP_BASE_NAME=%~n0
|
17
14
|
set APP_HOME=%DIRNAME%
|
18
15
|
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
19
|
@rem Find java.exe
|
20
20
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
21
|
|
@@ -46,10 +46,9 @@ echo location of your Java installation.
|
|
46
46
|
goto fail
|
47
47
|
|
48
48
|
:init
|
49
|
-
@rem Get command-line arguments, handling
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
50
|
|
51
51
|
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
52
|
|
54
53
|
:win9xME_args
|
55
54
|
@rem Slurp the command line arguments.
|
@@ -60,11 +59,6 @@ set _SKIP=2
|
|
60
59
|
if "x%~1" == "x" goto execute
|
61
60
|
|
62
61
|
set CMD_LINE_ARGS=%*
|
63
|
-
goto execute
|
64
|
-
|
65
|
-
:4NT_args
|
66
|
-
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
-
set CMD_LINE_ARGS=%$
|
68
62
|
|
69
63
|
:execute
|
70
64
|
@rem Setup the command line
|
@@ -43,8 +43,8 @@ module Embulk
|
|
43
43
|
end
|
44
44
|
|
45
45
|
# upgrade gradle version
|
46
|
-
if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-
|
47
|
-
# gradle < 2.
|
46
|
+
if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-[23]\.\d+(\.\d+)?-/)
|
47
|
+
# gradle < 3.2.1
|
48
48
|
require 'embulk/data/package_data'
|
49
49
|
data = PackageData.new("new", migrator.path)
|
50
50
|
migrator.write "gradle/wrapper/gradle-wrapper.properties", data.content("java/gradle/wrapper/gradle-wrapper.properties")
|
@@ -60,9 +60,11 @@ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
|
60
60
|
script "pkg/${project.name}-${project.version}.gem"
|
61
61
|
}
|
62
62
|
|
63
|
-
task "package"(dependsOn: ["gemspec", "classpath"])
|
64
|
-
|
65
|
-
|
63
|
+
task "package"(dependsOn: ["gemspec", "classpath"]) {
|
64
|
+
doLast {
|
65
|
+
println "> Build succeeded."
|
66
|
+
println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
|
67
|
+
}
|
66
68
|
}
|
67
69
|
|
68
70
|
task gemspec {
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -6,12 +6,30 @@
|
|
6
6
|
##
|
7
7
|
##############################################################################
|
8
8
|
|
9
|
-
#
|
10
|
-
|
9
|
+
# Attempt to set APP_HOME
|
10
|
+
# Resolve links: $0 may be a link
|
11
|
+
PRG="$0"
|
12
|
+
# Need this for relative symlinks.
|
13
|
+
while [ -h "$PRG" ] ; do
|
14
|
+
ls=`ls -ld "$PRG"`
|
15
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
16
|
+
if expr "$link" : '/.*' > /dev/null; then
|
17
|
+
PRG="$link"
|
18
|
+
else
|
19
|
+
PRG=`dirname "$PRG"`"/$link"
|
20
|
+
fi
|
21
|
+
done
|
22
|
+
SAVED="`pwd`"
|
23
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
24
|
+
APP_HOME="`pwd -P`"
|
25
|
+
cd "$SAVED" >/dev/null
|
11
26
|
|
12
27
|
APP_NAME="Gradle"
|
13
28
|
APP_BASE_NAME=`basename "$0"`
|
14
29
|
|
30
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
31
|
+
DEFAULT_JVM_OPTS=""
|
32
|
+
|
15
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
34
|
MAX_FD="maximum"
|
17
35
|
|
@@ -30,6 +48,7 @@ die ( ) {
|
|
30
48
|
cygwin=false
|
31
49
|
msys=false
|
32
50
|
darwin=false
|
51
|
+
nonstop=false
|
33
52
|
case "`uname`" in
|
34
53
|
CYGWIN* )
|
35
54
|
cygwin=true
|
@@ -40,26 +59,11 @@ case "`uname`" in
|
|
40
59
|
MINGW* )
|
41
60
|
msys=true
|
42
61
|
;;
|
62
|
+
NONSTOP* )
|
63
|
+
nonstop=true
|
64
|
+
;;
|
43
65
|
esac
|
44
66
|
|
45
|
-
# Attempt to set APP_HOME
|
46
|
-
# Resolve links: $0 may be a link
|
47
|
-
PRG="$0"
|
48
|
-
# Need this for relative symlinks.
|
49
|
-
while [ -h "$PRG" ] ; do
|
50
|
-
ls=`ls -ld "$PRG"`
|
51
|
-
link=`expr "$ls" : '.*-> \(.*\)$'`
|
52
|
-
if expr "$link" : '/.*' > /dev/null; then
|
53
|
-
PRG="$link"
|
54
|
-
else
|
55
|
-
PRG=`dirname "$PRG"`"/$link"
|
56
|
-
fi
|
57
|
-
done
|
58
|
-
SAVED="`pwd`"
|
59
|
-
cd "`dirname \"$PRG\"`/" >/dev/null
|
60
|
-
APP_HOME="`pwd -P`"
|
61
|
-
cd "$SAVED" >/dev/null
|
62
|
-
|
63
67
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
64
68
|
|
65
69
|
# Determine the Java command to use to start the JVM.
|
@@ -85,7 +89,7 @@ location of your Java installation."
|
|
85
89
|
fi
|
86
90
|
|
87
91
|
# Increase the maximum file descriptors if we can.
|
88
|
-
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
92
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
89
93
|
MAX_FD_LIMIT=`ulimit -H -n`
|
90
94
|
if [ $? -eq 0 ] ; then
|
91
95
|
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
@@ -157,4 +161,9 @@ function splitJvmOpts() {
|
|
157
161
|
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
158
162
|
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
159
163
|
|
164
|
+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
165
|
+
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
|
166
|
+
cd "$(dirname "$0")"
|
167
|
+
fi
|
168
|
+
|
160
169
|
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
@@ -8,14 +8,14 @@
|
|
8
8
|
@rem Set local scope for the variables with windows NT shell
|
9
9
|
if "%OS%"=="Windows_NT" setlocal
|
10
10
|
|
11
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
-
set DEFAULT_JVM_OPTS=
|
13
|
-
|
14
11
|
set DIRNAME=%~dp0
|
15
12
|
if "%DIRNAME%" == "" set DIRNAME=.
|
16
13
|
set APP_BASE_NAME=%~n0
|
17
14
|
set APP_HOME=%DIRNAME%
|
18
15
|
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
19
|
@rem Find java.exe
|
20
20
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
21
|
|
@@ -46,10 +46,9 @@ echo location of your Java installation.
|
|
46
46
|
goto fail
|
47
47
|
|
48
48
|
:init
|
49
|
-
@rem Get command-line arguments, handling
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
50
|
|
51
51
|
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
52
|
|
54
53
|
:win9xME_args
|
55
54
|
@rem Slurp the command line arguments.
|
@@ -60,11 +59,6 @@ set _SKIP=2
|
|
60
59
|
if "x%~1" == "x" goto execute
|
61
60
|
|
62
61
|
set CMD_LINE_ARGS=%*
|
63
|
-
goto execute
|
64
|
-
|
65
|
-
:4NT_args
|
66
|
-
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
-
set CMD_LINE_ARGS=%$
|
68
62
|
|
69
63
|
:execute
|
70
64
|
@rem Setup the command line
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -6,7 +6,7 @@ module Embulk
|
|
6
6
|
Plugin.register_guess('csv', self)
|
7
7
|
|
8
8
|
DELIMITER_CANDIDATES = [
|
9
|
-
",", "\t", "|"
|
9
|
+
",", "\t", "|", ";"
|
10
10
|
]
|
11
11
|
|
12
12
|
QUOTE_CANDIDATES = [
|
@@ -41,8 +41,8 @@ module Embulk
|
|
41
41
|
else
|
42
42
|
delim = guess_delimiter(sample_lines)
|
43
43
|
unless delim
|
44
|
-
#
|
45
|
-
|
44
|
+
# assuming single column CSV
|
45
|
+
delim = DELIMITER_CANDIDATES.first
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
@@ -104,26 +104,48 @@ module Embulk
|
|
104
104
|
|
105
105
|
sample_records = split_lines(parser_guessed, true, sample_lines, delim, {})
|
106
106
|
|
107
|
-
|
108
|
-
|
107
|
+
if sample_lines.size == 1
|
108
|
+
# The file contains only 1 line. Assume that there are no header line.
|
109
|
+
header_line = false
|
109
110
|
|
110
|
-
|
111
|
-
# guess failed
|
112
|
-
return {}
|
113
|
-
end
|
111
|
+
column_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
|
114
112
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
113
|
+
unless parser_guessed.has_key?("trim_if_not_quoted")
|
114
|
+
sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
|
115
|
+
column_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed)
|
116
|
+
if column_types != column_types_trimmed
|
117
|
+
parser_guessed["trim_if_not_quoted"] = true
|
118
|
+
column_types = column_types_trimmed
|
119
|
+
else
|
120
|
+
parser_guessed["trim_if_not_quoted"] = false
|
121
|
+
end
|
122
|
+
end
|
123
|
+
else
|
124
|
+
# The file contains more than 1 line. If guessed first line's column types are all strings or boolean, and the types are
|
125
|
+
# different from the other lines, assume that the first line is column names.
|
126
|
+
first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
|
127
|
+
other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
|
128
|
+
|
129
|
+
unless parser_guessed.has_key?("trim_if_not_quoted")
|
130
|
+
sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
|
131
|
+
other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
|
132
|
+
if other_types != other_types_trimmed
|
133
|
+
parser_guessed["trim_if_not_quoted"] = true
|
134
|
+
other_types = other_types_trimmed
|
135
|
+
else
|
136
|
+
parser_guessed["trim_if_not_quoted"] = false
|
137
|
+
end
|
123
138
|
end
|
139
|
+
|
140
|
+
header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) }) || guess_string_header_line(sample_records)
|
141
|
+
column_types = other_types
|
124
142
|
end
|
125
143
|
|
126
|
-
|
144
|
+
if column_types.empty?
|
145
|
+
# TODO here is making the guessing failed if the file doesn't contain any columns. However,
|
146
|
+
# this may not be convenient for users.
|
147
|
+
return {}
|
148
|
+
end
|
127
149
|
|
128
150
|
if header_line
|
129
151
|
parser_guessed["skip_header_lines"] = skip_header_lines + 1
|
@@ -135,12 +157,12 @@ module Embulk
|
|
135
157
|
parser_guessed["allow_optional_columns"] = false unless parser_guessed.has_key?("allow_optional_columns")
|
136
158
|
|
137
159
|
if header_line
|
138
|
-
column_names = sample_records.first
|
160
|
+
column_names = sample_records.first.map(&:strip)
|
139
161
|
else
|
140
|
-
column_names = (0..
|
162
|
+
column_names = (0..column_types.size).to_a.map {|i| "c#{i}" }
|
141
163
|
end
|
142
164
|
schema = []
|
143
|
-
column_names.zip(
|
165
|
+
column_names.zip(column_types).each do |name,type|
|
144
166
|
if name && type
|
145
167
|
schema << new_column(name, type)
|
146
168
|
end
|
@@ -313,7 +335,7 @@ module Embulk
|
|
313
335
|
first = sample_records.first
|
314
336
|
first.count.times do |column_index|
|
315
337
|
lengths = sample_records.map {|row| row[column_index] }.compact.map {|v| v.to_s.size }
|
316
|
-
if lengths.size >
|
338
|
+
if lengths.size > 1
|
317
339
|
if array_variance(lengths[1..-1]) <= 0.2
|
318
340
|
avg = array_avg(lengths[1..-1])
|
319
341
|
if avg == 0.0 ? lengths[0] > 1 : (avg - lengths[0]).abs / avg > 0.7
|
data/lib/embulk/guess/newline.rb
CHANGED
@@ -4,10 +4,16 @@ module Embulk
|
|
4
4
|
class NewlineGuessPlugin < TextGuessPlugin
|
5
5
|
Plugin.register_guess('newline', self)
|
6
6
|
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def guess(config, sample)
|
8
|
+
if config.fetch('parser', {}).fetch('charset', nil).nil?
|
9
|
+
require 'embulk/guess/charset'
|
10
|
+
charset_guess = Guess::CharsetGuessPlugin.new
|
11
|
+
return charset_guess.guess(config, sample)
|
12
|
+
end
|
13
|
+
|
14
|
+
cr_count = sample.count("\r")
|
15
|
+
lf_count = sample.count("\n")
|
16
|
+
crlf_count = sample.scan(/\r\n/).length
|
11
17
|
if crlf_count > cr_count / 2 && crlf_count > lf_count / 2
|
12
18
|
return {"parser" => {"newline" => "CRLF"}}
|
13
19
|
elsif cr_count > lf_count / 2
|
data/lib/embulk/guess_plugin.rb
CHANGED
@@ -113,7 +113,9 @@ module Embulk
|
|
113
113
|
while line = decoder.poll
|
114
114
|
sample_lines << line
|
115
115
|
end
|
116
|
-
|
116
|
+
unless sample.end_with?(parser_task.getNewline.getString)
|
117
|
+
sample_lines.pop if sample_lines.empty? # last line is partial
|
118
|
+
end
|
117
119
|
end
|
118
120
|
|
119
121
|
return guess_lines(config, sample_lines);
|