embulk 0.8.15 → 0.8.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/appveyor.yml +8 -0
- data/build.gradle +86 -45
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
- data/embulk-docs/src/built-in.rst +34 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.16.rst +43 -0
- data/embulk-standards/build.gradle +1 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +30 -21
- data/gradlew.bat +4 -10
- data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
- data/lib/embulk/data/new/java/build.gradle.erb +5 -3
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +30 -21
- data/lib/embulk/data/new/java/gradlew.bat +4 -10
- data/lib/embulk/guess/csv.rb +44 -22
- data/lib/embulk/guess/newline.rb +10 -4
- data/lib/embulk/guess_plugin.rb +3 -1
- data/lib/embulk/java/time_helper.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- metadata +92 -5
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
data/gradlew
CHANGED
@@ -6,12 +6,30 @@
|
|
6
6
|
##
|
7
7
|
##############################################################################
|
8
8
|
|
9
|
-
#
|
10
|
-
|
9
|
+
# Attempt to set APP_HOME
|
10
|
+
# Resolve links: $0 may be a link
|
11
|
+
PRG="$0"
|
12
|
+
# Need this for relative symlinks.
|
13
|
+
while [ -h "$PRG" ] ; do
|
14
|
+
ls=`ls -ld "$PRG"`
|
15
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
16
|
+
if expr "$link" : '/.*' > /dev/null; then
|
17
|
+
PRG="$link"
|
18
|
+
else
|
19
|
+
PRG=`dirname "$PRG"`"/$link"
|
20
|
+
fi
|
21
|
+
done
|
22
|
+
SAVED="`pwd`"
|
23
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
24
|
+
APP_HOME="`pwd -P`"
|
25
|
+
cd "$SAVED" >/dev/null
|
11
26
|
|
12
27
|
APP_NAME="Gradle"
|
13
28
|
APP_BASE_NAME=`basename "$0"`
|
14
29
|
|
30
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
31
|
+
DEFAULT_JVM_OPTS=""
|
32
|
+
|
15
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
34
|
MAX_FD="maximum"
|
17
35
|
|
@@ -30,6 +48,7 @@ die ( ) {
|
|
30
48
|
cygwin=false
|
31
49
|
msys=false
|
32
50
|
darwin=false
|
51
|
+
nonstop=false
|
33
52
|
case "`uname`" in
|
34
53
|
CYGWIN* )
|
35
54
|
cygwin=true
|
@@ -40,26 +59,11 @@ case "`uname`" in
|
|
40
59
|
MINGW* )
|
41
60
|
msys=true
|
42
61
|
;;
|
62
|
+
NONSTOP* )
|
63
|
+
nonstop=true
|
64
|
+
;;
|
43
65
|
esac
|
44
66
|
|
45
|
-
# Attempt to set APP_HOME
|
46
|
-
# Resolve links: $0 may be a link
|
47
|
-
PRG="$0"
|
48
|
-
# Need this for relative symlinks.
|
49
|
-
while [ -h "$PRG" ] ; do
|
50
|
-
ls=`ls -ld "$PRG"`
|
51
|
-
link=`expr "$ls" : '.*-> \(.*\)$'`
|
52
|
-
if expr "$link" : '/.*' > /dev/null; then
|
53
|
-
PRG="$link"
|
54
|
-
else
|
55
|
-
PRG=`dirname "$PRG"`"/$link"
|
56
|
-
fi
|
57
|
-
done
|
58
|
-
SAVED="`pwd`"
|
59
|
-
cd "`dirname \"$PRG\"`/" >/dev/null
|
60
|
-
APP_HOME="`pwd -P`"
|
61
|
-
cd "$SAVED" >/dev/null
|
62
|
-
|
63
67
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
64
68
|
|
65
69
|
# Determine the Java command to use to start the JVM.
|
@@ -85,7 +89,7 @@ location of your Java installation."
|
|
85
89
|
fi
|
86
90
|
|
87
91
|
# Increase the maximum file descriptors if we can.
|
88
|
-
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
92
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
89
93
|
MAX_FD_LIMIT=`ulimit -H -n`
|
90
94
|
if [ $? -eq 0 ] ; then
|
91
95
|
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
@@ -157,4 +161,9 @@ function splitJvmOpts() {
|
|
157
161
|
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
158
162
|
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
159
163
|
|
164
|
+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
165
|
+
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
|
166
|
+
cd "$(dirname "$0")"
|
167
|
+
fi
|
168
|
+
|
160
169
|
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
data/gradlew.bat
CHANGED
@@ -8,14 +8,14 @@
|
|
8
8
|
@rem Set local scope for the variables with windows NT shell
|
9
9
|
if "%OS%"=="Windows_NT" setlocal
|
10
10
|
|
11
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
-
set DEFAULT_JVM_OPTS=
|
13
|
-
|
14
11
|
set DIRNAME=%~dp0
|
15
12
|
if "%DIRNAME%" == "" set DIRNAME=.
|
16
13
|
set APP_BASE_NAME=%~n0
|
17
14
|
set APP_HOME=%DIRNAME%
|
18
15
|
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
19
|
@rem Find java.exe
|
20
20
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
21
|
|
@@ -46,10 +46,9 @@ echo location of your Java installation.
|
|
46
46
|
goto fail
|
47
47
|
|
48
48
|
:init
|
49
|
-
@rem Get command-line arguments, handling
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
50
|
|
51
51
|
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
52
|
|
54
53
|
:win9xME_args
|
55
54
|
@rem Slurp the command line arguments.
|
@@ -60,11 +59,6 @@ set _SKIP=2
|
|
60
59
|
if "x%~1" == "x" goto execute
|
61
60
|
|
62
61
|
set CMD_LINE_ARGS=%*
|
63
|
-
goto execute
|
64
|
-
|
65
|
-
:4NT_args
|
66
|
-
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
-
set CMD_LINE_ARGS=%$
|
68
62
|
|
69
63
|
:execute
|
70
64
|
@rem Setup the command line
|
@@ -43,8 +43,8 @@ module Embulk
|
|
43
43
|
end
|
44
44
|
|
45
45
|
# upgrade gradle version
|
46
|
-
if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-
|
47
|
-
# gradle < 2.
|
46
|
+
if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-[23]\.\d+(\.\d+)?-/)
|
47
|
+
# gradle < 3.2.1
|
48
48
|
require 'embulk/data/package_data'
|
49
49
|
data = PackageData.new("new", migrator.path)
|
50
50
|
migrator.write "gradle/wrapper/gradle-wrapper.properties", data.content("java/gradle/wrapper/gradle-wrapper.properties")
|
@@ -60,9 +60,11 @@ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
|
60
60
|
script "pkg/${project.name}-${project.version}.gem"
|
61
61
|
}
|
62
62
|
|
63
|
-
task "package"(dependsOn: ["gemspec", "classpath"])
|
64
|
-
|
65
|
-
|
63
|
+
task "package"(dependsOn: ["gemspec", "classpath"]) {
|
64
|
+
doLast {
|
65
|
+
println "> Build succeeded."
|
66
|
+
println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
|
67
|
+
}
|
66
68
|
}
|
67
69
|
|
68
70
|
task gemspec {
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -6,12 +6,30 @@
|
|
6
6
|
##
|
7
7
|
##############################################################################
|
8
8
|
|
9
|
-
#
|
10
|
-
|
9
|
+
# Attempt to set APP_HOME
|
10
|
+
# Resolve links: $0 may be a link
|
11
|
+
PRG="$0"
|
12
|
+
# Need this for relative symlinks.
|
13
|
+
while [ -h "$PRG" ] ; do
|
14
|
+
ls=`ls -ld "$PRG"`
|
15
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
16
|
+
if expr "$link" : '/.*' > /dev/null; then
|
17
|
+
PRG="$link"
|
18
|
+
else
|
19
|
+
PRG=`dirname "$PRG"`"/$link"
|
20
|
+
fi
|
21
|
+
done
|
22
|
+
SAVED="`pwd`"
|
23
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
24
|
+
APP_HOME="`pwd -P`"
|
25
|
+
cd "$SAVED" >/dev/null
|
11
26
|
|
12
27
|
APP_NAME="Gradle"
|
13
28
|
APP_BASE_NAME=`basename "$0"`
|
14
29
|
|
30
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
31
|
+
DEFAULT_JVM_OPTS=""
|
32
|
+
|
15
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
34
|
MAX_FD="maximum"
|
17
35
|
|
@@ -30,6 +48,7 @@ die ( ) {
|
|
30
48
|
cygwin=false
|
31
49
|
msys=false
|
32
50
|
darwin=false
|
51
|
+
nonstop=false
|
33
52
|
case "`uname`" in
|
34
53
|
CYGWIN* )
|
35
54
|
cygwin=true
|
@@ -40,26 +59,11 @@ case "`uname`" in
|
|
40
59
|
MINGW* )
|
41
60
|
msys=true
|
42
61
|
;;
|
62
|
+
NONSTOP* )
|
63
|
+
nonstop=true
|
64
|
+
;;
|
43
65
|
esac
|
44
66
|
|
45
|
-
# Attempt to set APP_HOME
|
46
|
-
# Resolve links: $0 may be a link
|
47
|
-
PRG="$0"
|
48
|
-
# Need this for relative symlinks.
|
49
|
-
while [ -h "$PRG" ] ; do
|
50
|
-
ls=`ls -ld "$PRG"`
|
51
|
-
link=`expr "$ls" : '.*-> \(.*\)$'`
|
52
|
-
if expr "$link" : '/.*' > /dev/null; then
|
53
|
-
PRG="$link"
|
54
|
-
else
|
55
|
-
PRG=`dirname "$PRG"`"/$link"
|
56
|
-
fi
|
57
|
-
done
|
58
|
-
SAVED="`pwd`"
|
59
|
-
cd "`dirname \"$PRG\"`/" >/dev/null
|
60
|
-
APP_HOME="`pwd -P`"
|
61
|
-
cd "$SAVED" >/dev/null
|
62
|
-
|
63
67
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
64
68
|
|
65
69
|
# Determine the Java command to use to start the JVM.
|
@@ -85,7 +89,7 @@ location of your Java installation."
|
|
85
89
|
fi
|
86
90
|
|
87
91
|
# Increase the maximum file descriptors if we can.
|
88
|
-
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
92
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
89
93
|
MAX_FD_LIMIT=`ulimit -H -n`
|
90
94
|
if [ $? -eq 0 ] ; then
|
91
95
|
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
@@ -157,4 +161,9 @@ function splitJvmOpts() {
|
|
157
161
|
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
158
162
|
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
159
163
|
|
164
|
+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
165
|
+
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
|
166
|
+
cd "$(dirname "$0")"
|
167
|
+
fi
|
168
|
+
|
160
169
|
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
@@ -8,14 +8,14 @@
|
|
8
8
|
@rem Set local scope for the variables with windows NT shell
|
9
9
|
if "%OS%"=="Windows_NT" setlocal
|
10
10
|
|
11
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
-
set DEFAULT_JVM_OPTS=
|
13
|
-
|
14
11
|
set DIRNAME=%~dp0
|
15
12
|
if "%DIRNAME%" == "" set DIRNAME=.
|
16
13
|
set APP_BASE_NAME=%~n0
|
17
14
|
set APP_HOME=%DIRNAME%
|
18
15
|
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
19
|
@rem Find java.exe
|
20
20
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
21
|
|
@@ -46,10 +46,9 @@ echo location of your Java installation.
|
|
46
46
|
goto fail
|
47
47
|
|
48
48
|
:init
|
49
|
-
@rem Get command-line arguments, handling
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
50
|
|
51
51
|
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
52
|
|
54
53
|
:win9xME_args
|
55
54
|
@rem Slurp the command line arguments.
|
@@ -60,11 +59,6 @@ set _SKIP=2
|
|
60
59
|
if "x%~1" == "x" goto execute
|
61
60
|
|
62
61
|
set CMD_LINE_ARGS=%*
|
63
|
-
goto execute
|
64
|
-
|
65
|
-
:4NT_args
|
66
|
-
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
-
set CMD_LINE_ARGS=%$
|
68
62
|
|
69
63
|
:execute
|
70
64
|
@rem Setup the command line
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -6,7 +6,7 @@ module Embulk
|
|
6
6
|
Plugin.register_guess('csv', self)
|
7
7
|
|
8
8
|
DELIMITER_CANDIDATES = [
|
9
|
-
",", "\t", "|"
|
9
|
+
",", "\t", "|", ";"
|
10
10
|
]
|
11
11
|
|
12
12
|
QUOTE_CANDIDATES = [
|
@@ -41,8 +41,8 @@ module Embulk
|
|
41
41
|
else
|
42
42
|
delim = guess_delimiter(sample_lines)
|
43
43
|
unless delim
|
44
|
-
#
|
45
|
-
|
44
|
+
# assuming single column CSV
|
45
|
+
delim = DELIMITER_CANDIDATES.first
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
@@ -104,26 +104,48 @@ module Embulk
|
|
104
104
|
|
105
105
|
sample_records = split_lines(parser_guessed, true, sample_lines, delim, {})
|
106
106
|
|
107
|
-
|
108
|
-
|
107
|
+
if sample_lines.size == 1
|
108
|
+
# The file contains only 1 line. Assume that there are no header line.
|
109
|
+
header_line = false
|
109
110
|
|
110
|
-
|
111
|
-
# guess failed
|
112
|
-
return {}
|
113
|
-
end
|
111
|
+
column_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
|
114
112
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
113
|
+
unless parser_guessed.has_key?("trim_if_not_quoted")
|
114
|
+
sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
|
115
|
+
column_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed)
|
116
|
+
if column_types != column_types_trimmed
|
117
|
+
parser_guessed["trim_if_not_quoted"] = true
|
118
|
+
column_types = column_types_trimmed
|
119
|
+
else
|
120
|
+
parser_guessed["trim_if_not_quoted"] = false
|
121
|
+
end
|
122
|
+
end
|
123
|
+
else
|
124
|
+
# The file contains more than 1 line. If guessed first line's column types are all strings or boolean, and the types are
|
125
|
+
# different from the other lines, assume that the first line is column names.
|
126
|
+
first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
|
127
|
+
other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
|
128
|
+
|
129
|
+
unless parser_guessed.has_key?("trim_if_not_quoted")
|
130
|
+
sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
|
131
|
+
other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
|
132
|
+
if other_types != other_types_trimmed
|
133
|
+
parser_guessed["trim_if_not_quoted"] = true
|
134
|
+
other_types = other_types_trimmed
|
135
|
+
else
|
136
|
+
parser_guessed["trim_if_not_quoted"] = false
|
137
|
+
end
|
123
138
|
end
|
139
|
+
|
140
|
+
header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) }) || guess_string_header_line(sample_records)
|
141
|
+
column_types = other_types
|
124
142
|
end
|
125
143
|
|
126
|
-
|
144
|
+
if column_types.empty?
|
145
|
+
# TODO here is making the guessing failed if the file doesn't contain any columns. However,
|
146
|
+
# this may not be convenient for users.
|
147
|
+
return {}
|
148
|
+
end
|
127
149
|
|
128
150
|
if header_line
|
129
151
|
parser_guessed["skip_header_lines"] = skip_header_lines + 1
|
@@ -135,12 +157,12 @@ module Embulk
|
|
135
157
|
parser_guessed["allow_optional_columns"] = false unless parser_guessed.has_key?("allow_optional_columns")
|
136
158
|
|
137
159
|
if header_line
|
138
|
-
column_names = sample_records.first
|
160
|
+
column_names = sample_records.first.map(&:strip)
|
139
161
|
else
|
140
|
-
column_names = (0..
|
162
|
+
column_names = (0..column_types.size).to_a.map {|i| "c#{i}" }
|
141
163
|
end
|
142
164
|
schema = []
|
143
|
-
column_names.zip(
|
165
|
+
column_names.zip(column_types).each do |name,type|
|
144
166
|
if name && type
|
145
167
|
schema << new_column(name, type)
|
146
168
|
end
|
@@ -313,7 +335,7 @@ module Embulk
|
|
313
335
|
first = sample_records.first
|
314
336
|
first.count.times do |column_index|
|
315
337
|
lengths = sample_records.map {|row| row[column_index] }.compact.map {|v| v.to_s.size }
|
316
|
-
if lengths.size >
|
338
|
+
if lengths.size > 1
|
317
339
|
if array_variance(lengths[1..-1]) <= 0.2
|
318
340
|
avg = array_avg(lengths[1..-1])
|
319
341
|
if avg == 0.0 ? lengths[0] > 1 : (avg - lengths[0]).abs / avg > 0.7
|
data/lib/embulk/guess/newline.rb
CHANGED
@@ -4,10 +4,16 @@ module Embulk
|
|
4
4
|
class NewlineGuessPlugin < TextGuessPlugin
|
5
5
|
Plugin.register_guess('newline', self)
|
6
6
|
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def guess(config, sample)
|
8
|
+
if config.fetch('parser', {}).fetch('charset', nil).nil?
|
9
|
+
require 'embulk/guess/charset'
|
10
|
+
charset_guess = Guess::CharsetGuessPlugin.new
|
11
|
+
return charset_guess.guess(config, sample)
|
12
|
+
end
|
13
|
+
|
14
|
+
cr_count = sample.count("\r")
|
15
|
+
lf_count = sample.count("\n")
|
16
|
+
crlf_count = sample.scan(/\r\n/).length
|
11
17
|
if crlf_count > cr_count / 2 && crlf_count > lf_count / 2
|
12
18
|
return {"parser" => {"newline" => "CRLF"}}
|
13
19
|
elsif cr_count > lf_count / 2
|
data/lib/embulk/guess_plugin.rb
CHANGED
@@ -113,7 +113,9 @@ module Embulk
|
|
113
113
|
while line = decoder.poll
|
114
114
|
sample_lines << line
|
115
115
|
end
|
116
|
-
|
116
|
+
unless sample.end_with?(parser_task.getNewline.getString)
|
117
|
+
sample_lines.pop if sample_lines.empty? # last line is partial
|
118
|
+
end
|
117
119
|
end
|
118
120
|
|
119
121
|
return guess_lines(config, sample_lines);
|