embulk 0.8.15 → 0.8.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -1
  3. data/appveyor.yml +8 -0
  4. data/build.gradle +86 -45
  5. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
  6. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
  7. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
  8. data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
  9. data/embulk-docs/src/built-in.rst +34 -0
  10. data/embulk-docs/src/release.rst +1 -0
  11. data/embulk-docs/src/release/release-0.8.16.rst +43 -0
  12. data/embulk-standards/build.gradle +1 -0
  13. data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
  14. data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
  15. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
  16. data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
  17. data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
  18. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
  19. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
  20. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
  21. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
  22. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
  23. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
  24. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
  25. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
  26. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
  27. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
  28. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
  29. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
  30. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
  31. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
  32. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
  33. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
  34. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
  35. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
  36. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
  37. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
  38. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
  39. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
  40. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
  41. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
  42. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
  43. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
  44. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
  45. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
  46. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
  47. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
  48. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
  49. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
  50. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
  51. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
  52. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
  53. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
  54. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
  55. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
  56. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
  57. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
  58. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
  59. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
  60. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
  61. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
  62. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
  63. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
  64. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
  65. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
  66. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
  67. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
  68. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
  69. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
  70. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
  71. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
  72. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
  73. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
  74. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
  75. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
  76. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
  77. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
  78. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
  79. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
  80. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
  81. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
  82. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
  83. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
  84. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
  85. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
  86. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
  87. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
  88. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
  89. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
  90. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
  91. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
  92. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
  93. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
  94. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
  95. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
  96. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
  97. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
  98. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
  99. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
  100. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
  101. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
  102. data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
  103. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  104. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  105. data/gradlew +30 -21
  106. data/gradlew.bat +4 -10
  107. data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
  108. data/lib/embulk/data/new/java/build.gradle.erb +5 -3
  109. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  110. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  111. data/lib/embulk/data/new/java/gradlew +30 -21
  112. data/lib/embulk/data/new/java/gradlew.bat +4 -10
  113. data/lib/embulk/guess/csv.rb +44 -22
  114. data/lib/embulk/guess/newline.rb +10 -4
  115. data/lib/embulk/guess_plugin.rb +3 -1
  116. data/lib/embulk/java/time_helper.rb +2 -2
  117. data/lib/embulk/version.rb +1 -1
  118. metadata +92 -5
@@ -1,6 +1,6 @@
1
- #Wed Jan 13 12:41:02 JST 2016
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
data/gradlew CHANGED
@@ -6,12 +6,30 @@
6
6
  ##
7
7
  ##############################################################################
8
8
 
9
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
- DEFAULT_JVM_OPTS=""
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
11
26
 
12
27
  APP_NAME="Gradle"
13
28
  APP_BASE_NAME=`basename "$0"`
14
29
 
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
15
33
  # Use the maximum available, or set MAX_FD != -1 to use that value.
16
34
  MAX_FD="maximum"
17
35
 
@@ -30,6 +48,7 @@ die ( ) {
30
48
  cygwin=false
31
49
  msys=false
32
50
  darwin=false
51
+ nonstop=false
33
52
  case "`uname`" in
34
53
  CYGWIN* )
35
54
  cygwin=true
@@ -40,26 +59,11 @@ case "`uname`" in
40
59
  MINGW* )
41
60
  msys=true
42
61
  ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
43
65
  esac
44
66
 
45
- # Attempt to set APP_HOME
46
- # Resolve links: $0 may be a link
47
- PRG="$0"
48
- # Need this for relative symlinks.
49
- while [ -h "$PRG" ] ; do
50
- ls=`ls -ld "$PRG"`
51
- link=`expr "$ls" : '.*-> \(.*\)$'`
52
- if expr "$link" : '/.*' > /dev/null; then
53
- PRG="$link"
54
- else
55
- PRG=`dirname "$PRG"`"/$link"
56
- fi
57
- done
58
- SAVED="`pwd`"
59
- cd "`dirname \"$PRG\"`/" >/dev/null
60
- APP_HOME="`pwd -P`"
61
- cd "$SAVED" >/dev/null
62
-
63
67
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
68
 
65
69
  # Determine the Java command to use to start the JVM.
@@ -85,7 +89,7 @@ location of your Java installation."
85
89
  fi
86
90
 
87
91
  # Increase the maximum file descriptors if we can.
88
- if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
89
93
  MAX_FD_LIMIT=`ulimit -H -n`
90
94
  if [ $? -eq 0 ] ; then
91
95
  if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
@@ -157,4 +161,9 @@ function splitJvmOpts() {
157
161
  eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
162
  JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
163
 
164
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
165
+ if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
166
+ cd "$(dirname "$0")"
167
+ fi
168
+
160
169
  exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
@@ -8,14 +8,14 @@
8
8
  @rem Set local scope for the variables with windows NT shell
9
9
  if "%OS%"=="Windows_NT" setlocal
10
10
 
11
- @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
- set DEFAULT_JVM_OPTS=
13
-
14
11
  set DIRNAME=%~dp0
15
12
  if "%DIRNAME%" == "" set DIRNAME=.
16
13
  set APP_BASE_NAME=%~n0
17
14
  set APP_HOME=%DIRNAME%
18
15
 
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
19
  @rem Find java.exe
20
20
  if defined JAVA_HOME goto findJavaFromJavaHome
21
21
 
@@ -46,10 +46,9 @@ echo location of your Java installation.
46
46
  goto fail
47
47
 
48
48
  :init
49
- @rem Get command-line arguments, handling Windowz variants
49
+ @rem Get command-line arguments, handling Windows variants
50
50
 
51
51
  if not "%OS%" == "Windows_NT" goto win9xME_args
52
- if "%@eval[2+2]" == "4" goto 4NT_args
53
52
 
54
53
  :win9xME_args
55
54
  @rem Slurp the command line arguments.
@@ -60,11 +59,6 @@ set _SKIP=2
60
59
  if "x%~1" == "x" goto execute
61
60
 
62
61
  set CMD_LINE_ARGS=%*
63
- goto execute
64
-
65
- :4NT_args
66
- @rem Get arguments from the 4NT Shell from JP Software
67
- set CMD_LINE_ARGS=%$
68
62
 
69
63
  :execute
70
64
  @rem Setup the command line
@@ -43,8 +43,8 @@ module Embulk
43
43
  end
44
44
 
45
45
  # upgrade gradle version
46
- if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-2\.\d-/)
47
- # gradle < 2.10 (\d matches one digit)
46
+ if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-[23]\.\d+(\.\d+)?-/)
47
+ # gradle < 3.2.1
48
48
  require 'embulk/data/package_data'
49
49
  data = PackageData.new("new", migrator.path)
50
50
  migrator.write "gradle/wrapper/gradle-wrapper.properties", data.content("java/gradle/wrapper/gradle-wrapper.properties")
@@ -60,9 +60,11 @@ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
60
60
  script "pkg/${project.name}-${project.version}.gem"
61
61
  }
62
62
 
63
- task "package"(dependsOn: ["gemspec", "classpath"]) << {
64
- println "> Build succeeded."
65
- println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
63
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
64
+ doLast {
65
+ println "> Build succeeded."
66
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
67
+ }
66
68
  }
67
69
 
68
70
  task gemspec {
@@ -1,6 +1,6 @@
1
- #Wed Jan 13 12:41:02 JST 2016
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
@@ -6,12 +6,30 @@
6
6
  ##
7
7
  ##############################################################################
8
8
 
9
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
- DEFAULT_JVM_OPTS=""
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
11
26
 
12
27
  APP_NAME="Gradle"
13
28
  APP_BASE_NAME=`basename "$0"`
14
29
 
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
15
33
  # Use the maximum available, or set MAX_FD != -1 to use that value.
16
34
  MAX_FD="maximum"
17
35
 
@@ -30,6 +48,7 @@ die ( ) {
30
48
  cygwin=false
31
49
  msys=false
32
50
  darwin=false
51
+ nonstop=false
33
52
  case "`uname`" in
34
53
  CYGWIN* )
35
54
  cygwin=true
@@ -40,26 +59,11 @@ case "`uname`" in
40
59
  MINGW* )
41
60
  msys=true
42
61
  ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
43
65
  esac
44
66
 
45
- # Attempt to set APP_HOME
46
- # Resolve links: $0 may be a link
47
- PRG="$0"
48
- # Need this for relative symlinks.
49
- while [ -h "$PRG" ] ; do
50
- ls=`ls -ld "$PRG"`
51
- link=`expr "$ls" : '.*-> \(.*\)$'`
52
- if expr "$link" : '/.*' > /dev/null; then
53
- PRG="$link"
54
- else
55
- PRG=`dirname "$PRG"`"/$link"
56
- fi
57
- done
58
- SAVED="`pwd`"
59
- cd "`dirname \"$PRG\"`/" >/dev/null
60
- APP_HOME="`pwd -P`"
61
- cd "$SAVED" >/dev/null
62
-
63
67
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
68
 
65
69
  # Determine the Java command to use to start the JVM.
@@ -85,7 +89,7 @@ location of your Java installation."
85
89
  fi
86
90
 
87
91
  # Increase the maximum file descriptors if we can.
88
- if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
89
93
  MAX_FD_LIMIT=`ulimit -H -n`
90
94
  if [ $? -eq 0 ] ; then
91
95
  if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
@@ -157,4 +161,9 @@ function splitJvmOpts() {
157
161
  eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
162
  JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
163
 
164
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
165
+ if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
166
+ cd "$(dirname "$0")"
167
+ fi
168
+
160
169
  exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
@@ -8,14 +8,14 @@
8
8
  @rem Set local scope for the variables with windows NT shell
9
9
  if "%OS%"=="Windows_NT" setlocal
10
10
 
11
- @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
- set DEFAULT_JVM_OPTS=
13
-
14
11
  set DIRNAME=%~dp0
15
12
  if "%DIRNAME%" == "" set DIRNAME=.
16
13
  set APP_BASE_NAME=%~n0
17
14
  set APP_HOME=%DIRNAME%
18
15
 
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
19
  @rem Find java.exe
20
20
  if defined JAVA_HOME goto findJavaFromJavaHome
21
21
 
@@ -46,10 +46,9 @@ echo location of your Java installation.
46
46
  goto fail
47
47
 
48
48
  :init
49
- @rem Get command-line arguments, handling Windowz variants
49
+ @rem Get command-line arguments, handling Windows variants
50
50
 
51
51
  if not "%OS%" == "Windows_NT" goto win9xME_args
52
- if "%@eval[2+2]" == "4" goto 4NT_args
53
52
 
54
53
  :win9xME_args
55
54
  @rem Slurp the command line arguments.
@@ -60,11 +59,6 @@ set _SKIP=2
60
59
  if "x%~1" == "x" goto execute
61
60
 
62
61
  set CMD_LINE_ARGS=%*
63
- goto execute
64
-
65
- :4NT_args
66
- @rem Get arguments from the 4NT Shell from JP Software
67
- set CMD_LINE_ARGS=%$
68
62
 
69
63
  :execute
70
64
  @rem Setup the command line
@@ -6,7 +6,7 @@ module Embulk
6
6
  Plugin.register_guess('csv', self)
7
7
 
8
8
  DELIMITER_CANDIDATES = [
9
- ",", "\t", "|"
9
+ ",", "\t", "|", ";"
10
10
  ]
11
11
 
12
12
  QUOTE_CANDIDATES = [
@@ -41,8 +41,8 @@ module Embulk
41
41
  else
42
42
  delim = guess_delimiter(sample_lines)
43
43
  unless delim
44
- # not CSV file
45
- return {}
44
+ # assuming single column CSV
45
+ delim = DELIMITER_CANDIDATES.first
46
46
  end
47
47
  end
48
48
 
@@ -104,26 +104,48 @@ module Embulk
104
104
 
105
105
  sample_records = split_lines(parser_guessed, true, sample_lines, delim, {})
106
106
 
107
- first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
108
- other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
107
+ if sample_lines.size == 1
108
+ # The file contains only 1 line. Assume that there are no header line.
109
+ header_line = false
109
110
 
110
- if first_types.size <= 1 || other_types.size <= 1
111
- # guess failed
112
- return {}
113
- end
111
+ column_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
114
112
 
115
- unless parser_guessed.has_key?("trim_if_not_quoted")
116
- sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
117
- other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
118
- if other_types != other_types_trimmed
119
- parser_guessed["trim_if_not_quoted"] = true
120
- other_types = other_types_trimmed
121
- else
122
- parser_guessed["trim_if_not_quoted"] = false
113
+ unless parser_guessed.has_key?("trim_if_not_quoted")
114
+ sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
115
+ column_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed)
116
+ if column_types != column_types_trimmed
117
+ parser_guessed["trim_if_not_quoted"] = true
118
+ column_types = column_types_trimmed
119
+ else
120
+ parser_guessed["trim_if_not_quoted"] = false
121
+ end
122
+ end
123
+ else
124
+ # The file contains more than 1 line. If guessed first line's column types are all strings or boolean, and the types are
125
+ # different from the other lines, assume that the first line is column names.
126
+ first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
127
+ other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
128
+
129
+ unless parser_guessed.has_key?("trim_if_not_quoted")
130
+ sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
131
+ other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
132
+ if other_types != other_types_trimmed
133
+ parser_guessed["trim_if_not_quoted"] = true
134
+ other_types = other_types_trimmed
135
+ else
136
+ parser_guessed["trim_if_not_quoted"] = false
137
+ end
123
138
  end
139
+
140
+ header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) }) || guess_string_header_line(sample_records)
141
+ column_types = other_types
124
142
  end
125
143
 
126
- header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) }) || guess_string_header_line(sample_records)
144
+ if column_types.empty?
145
+ # TODO here is making the guessing failed if the file doesn't contain any columns. However,
146
+ # this may not be convenient for users.
147
+ return {}
148
+ end
127
149
 
128
150
  if header_line
129
151
  parser_guessed["skip_header_lines"] = skip_header_lines + 1
@@ -135,12 +157,12 @@ module Embulk
135
157
  parser_guessed["allow_optional_columns"] = false unless parser_guessed.has_key?("allow_optional_columns")
136
158
 
137
159
  if header_line
138
- column_names = sample_records.first
160
+ column_names = sample_records.first.map(&:strip)
139
161
  else
140
- column_names = (0..other_types.size).to_a.map {|i| "c#{i}" }
162
+ column_names = (0..column_types.size).to_a.map {|i| "c#{i}" }
141
163
  end
142
164
  schema = []
143
- column_names.zip(other_types).each do |name,type|
165
+ column_names.zip(column_types).each do |name,type|
144
166
  if name && type
145
167
  schema << new_column(name, type)
146
168
  end
@@ -313,7 +335,7 @@ module Embulk
313
335
  first = sample_records.first
314
336
  first.count.times do |column_index|
315
337
  lengths = sample_records.map {|row| row[column_index] }.compact.map {|v| v.to_s.size }
316
- if lengths.size > 2
338
+ if lengths.size > 1
317
339
  if array_variance(lengths[1..-1]) <= 0.2
318
340
  avg = array_avg(lengths[1..-1])
319
341
  if avg == 0.0 ? lengths[0] > 1 : (avg - lengths[0]).abs / avg > 0.7
@@ -4,10 +4,16 @@ module Embulk
4
4
  class NewlineGuessPlugin < TextGuessPlugin
5
5
  Plugin.register_guess('newline', self)
6
6
 
7
- def guess_text(config, sample_text)
8
- cr_count = sample_text.count("\r")
9
- lf_count = sample_text.count("\n")
10
- crlf_count = sample_text.scan(/\r\n/).length
7
+ def guess(config, sample)
8
+ if config.fetch('parser', {}).fetch('charset', nil).nil?
9
+ require 'embulk/guess/charset'
10
+ charset_guess = Guess::CharsetGuessPlugin.new
11
+ return charset_guess.guess(config, sample)
12
+ end
13
+
14
+ cr_count = sample.count("\r")
15
+ lf_count = sample.count("\n")
16
+ crlf_count = sample.scan(/\r\n/).length
11
17
  if crlf_count > cr_count / 2 && crlf_count > lf_count / 2
12
18
  return {"parser" => {"newline" => "CRLF"}}
13
19
  elsif cr_count > lf_count / 2
@@ -113,7 +113,9 @@ module Embulk
113
113
  while line = decoder.poll
114
114
  sample_lines << line
115
115
  end
116
- sample_lines.pop unless sample_lines.empty? # last line can be partial
116
+ unless sample.end_with?(parser_task.getNewline.getString)
117
+ sample_lines.pop if sample_lines.empty? # last line is partial
118
+ end
117
119
  end
118
120
 
119
121
  return guess_lines(config, sample_lines);