embulk 0.8.15-java → 0.8.16-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -1
  3. data/appveyor.yml +8 -0
  4. data/build.gradle +86 -45
  5. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
  6. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
  7. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
  8. data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
  9. data/embulk-docs/src/built-in.rst +34 -0
  10. data/embulk-docs/src/release.rst +1 -0
  11. data/embulk-docs/src/release/release-0.8.16.rst +43 -0
  12. data/embulk-standards/build.gradle +1 -0
  13. data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
  14. data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
  15. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
  16. data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
  17. data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
  18. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
  19. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
  20. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
  21. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
  22. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
  23. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
  24. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
  25. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
  26. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
  27. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
  28. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
  29. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
  30. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
  31. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
  32. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
  33. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
  34. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
  35. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
  36. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
  37. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
  38. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
  39. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
  40. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
  41. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
  42. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
  43. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
  44. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
  45. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
  46. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
  47. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
  48. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
  49. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
  50. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
  51. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
  52. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
  53. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
  54. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
  55. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
  56. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
  57. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
  58. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
  59. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
  60. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
  61. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
  62. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
  63. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
  64. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
  65. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
  66. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
  67. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
  68. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
  69. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
  70. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
  71. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
  72. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
  73. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
  74. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
  75. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
  76. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
  77. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
  78. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
  79. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
  80. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
  81. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
  82. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
  83. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
  84. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
  85. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
  86. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
  87. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
  88. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
  89. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
  90. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
  91. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
  92. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
  93. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
  94. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
  95. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
  96. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
  97. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
  98. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
  99. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
  100. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
  101. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
  102. data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
  103. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  104. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  105. data/gradlew +30 -21
  106. data/gradlew.bat +4 -10
  107. data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
  108. data/lib/embulk/data/new/java/build.gradle.erb +5 -3
  109. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  110. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  111. data/lib/embulk/data/new/java/gradlew +30 -21
  112. data/lib/embulk/data/new/java/gradlew.bat +4 -10
  113. data/lib/embulk/guess/csv.rb +44 -22
  114. data/lib/embulk/guess/newline.rb +10 -4
  115. data/lib/embulk/guess_plugin.rb +3 -1
  116. data/lib/embulk/java/time_helper.rb +2 -2
  117. data/lib/embulk/version.rb +1 -1
  118. metadata +92 -5
@@ -1,6 +1,6 @@
1
- #Wed Jan 13 12:41:02 JST 2016
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
data/gradlew CHANGED
@@ -6,12 +6,30 @@
6
6
  ##
7
7
  ##############################################################################
8
8
 
9
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
- DEFAULT_JVM_OPTS=""
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
11
26
 
12
27
  APP_NAME="Gradle"
13
28
  APP_BASE_NAME=`basename "$0"`
14
29
 
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
15
33
  # Use the maximum available, or set MAX_FD != -1 to use that value.
16
34
  MAX_FD="maximum"
17
35
 
@@ -30,6 +48,7 @@ die ( ) {
30
48
  cygwin=false
31
49
  msys=false
32
50
  darwin=false
51
+ nonstop=false
33
52
  case "`uname`" in
34
53
  CYGWIN* )
35
54
  cygwin=true
@@ -40,26 +59,11 @@ case "`uname`" in
40
59
  MINGW* )
41
60
  msys=true
42
61
  ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
43
65
  esac
44
66
 
45
- # Attempt to set APP_HOME
46
- # Resolve links: $0 may be a link
47
- PRG="$0"
48
- # Need this for relative symlinks.
49
- while [ -h "$PRG" ] ; do
50
- ls=`ls -ld "$PRG"`
51
- link=`expr "$ls" : '.*-> \(.*\)$'`
52
- if expr "$link" : '/.*' > /dev/null; then
53
- PRG="$link"
54
- else
55
- PRG=`dirname "$PRG"`"/$link"
56
- fi
57
- done
58
- SAVED="`pwd`"
59
- cd "`dirname \"$PRG\"`/" >/dev/null
60
- APP_HOME="`pwd -P`"
61
- cd "$SAVED" >/dev/null
62
-
63
67
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
68
 
65
69
  # Determine the Java command to use to start the JVM.
@@ -85,7 +89,7 @@ location of your Java installation."
85
89
  fi
86
90
 
87
91
  # Increase the maximum file descriptors if we can.
88
- if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
89
93
  MAX_FD_LIMIT=`ulimit -H -n`
90
94
  if [ $? -eq 0 ] ; then
91
95
  if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
@@ -157,4 +161,9 @@ function splitJvmOpts() {
157
161
  eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
162
  JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
163
 
164
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
165
+ if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
166
+ cd "$(dirname "$0")"
167
+ fi
168
+
160
169
  exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
@@ -8,14 +8,14 @@
8
8
  @rem Set local scope for the variables with windows NT shell
9
9
  if "%OS%"=="Windows_NT" setlocal
10
10
 
11
- @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
- set DEFAULT_JVM_OPTS=
13
-
14
11
  set DIRNAME=%~dp0
15
12
  if "%DIRNAME%" == "" set DIRNAME=.
16
13
  set APP_BASE_NAME=%~n0
17
14
  set APP_HOME=%DIRNAME%
18
15
 
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
19
  @rem Find java.exe
20
20
  if defined JAVA_HOME goto findJavaFromJavaHome
21
21
 
@@ -46,10 +46,9 @@ echo location of your Java installation.
46
46
  goto fail
47
47
 
48
48
  :init
49
- @rem Get command-line arguments, handling Windowz variants
49
+ @rem Get command-line arguments, handling Windows variants
50
50
 
51
51
  if not "%OS%" == "Windows_NT" goto win9xME_args
52
- if "%@eval[2+2]" == "4" goto 4NT_args
53
52
 
54
53
  :win9xME_args
55
54
  @rem Slurp the command line arguments.
@@ -60,11 +59,6 @@ set _SKIP=2
60
59
  if "x%~1" == "x" goto execute
61
60
 
62
61
  set CMD_LINE_ARGS=%*
63
- goto execute
64
-
65
- :4NT_args
66
- @rem Get arguments from the 4NT Shell from JP Software
67
- set CMD_LINE_ARGS=%$
68
62
 
69
63
  :execute
70
64
  @rem Setup the command line
@@ -43,8 +43,8 @@ module Embulk
43
43
  end
44
44
 
45
45
  # upgrade gradle version
46
- if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-2\.\d-/)
47
- # gradle < 2.10 (\d matches one digit)
46
+ if migrator.match("gradle/wrapper/gradle-wrapper.properties", /gradle-[23]\.\d+(\.\d+)?-/)
47
+ # gradle < 3.2.1
48
48
  require 'embulk/data/package_data'
49
49
  data = PackageData.new("new", migrator.path)
50
50
  migrator.write "gradle/wrapper/gradle-wrapper.properties", data.content("java/gradle/wrapper/gradle-wrapper.properties")
@@ -60,9 +60,11 @@ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
60
60
  script "pkg/${project.name}-${project.version}.gem"
61
61
  }
62
62
 
63
- task "package"(dependsOn: ["gemspec", "classpath"]) << {
64
- println "> Build succeeded."
65
- println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
63
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
64
+ doLast {
65
+ println "> Build succeeded."
66
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
67
+ }
66
68
  }
67
69
 
68
70
  task gemspec {
@@ -1,6 +1,6 @@
1
- #Wed Jan 13 12:41:02 JST 2016
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
@@ -6,12 +6,30 @@
6
6
  ##
7
7
  ##############################################################################
8
8
 
9
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
- DEFAULT_JVM_OPTS=""
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
11
26
 
12
27
  APP_NAME="Gradle"
13
28
  APP_BASE_NAME=`basename "$0"`
14
29
 
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
15
33
  # Use the maximum available, or set MAX_FD != -1 to use that value.
16
34
  MAX_FD="maximum"
17
35
 
@@ -30,6 +48,7 @@ die ( ) {
30
48
  cygwin=false
31
49
  msys=false
32
50
  darwin=false
51
+ nonstop=false
33
52
  case "`uname`" in
34
53
  CYGWIN* )
35
54
  cygwin=true
@@ -40,26 +59,11 @@ case "`uname`" in
40
59
  MINGW* )
41
60
  msys=true
42
61
  ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
43
65
  esac
44
66
 
45
- # Attempt to set APP_HOME
46
- # Resolve links: $0 may be a link
47
- PRG="$0"
48
- # Need this for relative symlinks.
49
- while [ -h "$PRG" ] ; do
50
- ls=`ls -ld "$PRG"`
51
- link=`expr "$ls" : '.*-> \(.*\)$'`
52
- if expr "$link" : '/.*' > /dev/null; then
53
- PRG="$link"
54
- else
55
- PRG=`dirname "$PRG"`"/$link"
56
- fi
57
- done
58
- SAVED="`pwd`"
59
- cd "`dirname \"$PRG\"`/" >/dev/null
60
- APP_HOME="`pwd -P`"
61
- cd "$SAVED" >/dev/null
62
-
63
67
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
68
 
65
69
  # Determine the Java command to use to start the JVM.
@@ -85,7 +89,7 @@ location of your Java installation."
85
89
  fi
86
90
 
87
91
  # Increase the maximum file descriptors if we can.
88
- if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
89
93
  MAX_FD_LIMIT=`ulimit -H -n`
90
94
  if [ $? -eq 0 ] ; then
91
95
  if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
@@ -157,4 +161,9 @@ function splitJvmOpts() {
157
161
  eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
162
  JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
163
 
164
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
165
+ if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
166
+ cd "$(dirname "$0")"
167
+ fi
168
+
160
169
  exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
@@ -8,14 +8,14 @@
8
8
  @rem Set local scope for the variables with windows NT shell
9
9
  if "%OS%"=="Windows_NT" setlocal
10
10
 
11
- @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
- set DEFAULT_JVM_OPTS=
13
-
14
11
  set DIRNAME=%~dp0
15
12
  if "%DIRNAME%" == "" set DIRNAME=.
16
13
  set APP_BASE_NAME=%~n0
17
14
  set APP_HOME=%DIRNAME%
18
15
 
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
19
  @rem Find java.exe
20
20
  if defined JAVA_HOME goto findJavaFromJavaHome
21
21
 
@@ -46,10 +46,9 @@ echo location of your Java installation.
46
46
  goto fail
47
47
 
48
48
  :init
49
- @rem Get command-line arguments, handling Windowz variants
49
+ @rem Get command-line arguments, handling Windows variants
50
50
 
51
51
  if not "%OS%" == "Windows_NT" goto win9xME_args
52
- if "%@eval[2+2]" == "4" goto 4NT_args
53
52
 
54
53
  :win9xME_args
55
54
  @rem Slurp the command line arguments.
@@ -60,11 +59,6 @@ set _SKIP=2
60
59
  if "x%~1" == "x" goto execute
61
60
 
62
61
  set CMD_LINE_ARGS=%*
63
- goto execute
64
-
65
- :4NT_args
66
- @rem Get arguments from the 4NT Shell from JP Software
67
- set CMD_LINE_ARGS=%$
68
62
 
69
63
  :execute
70
64
  @rem Setup the command line
@@ -6,7 +6,7 @@ module Embulk
6
6
  Plugin.register_guess('csv', self)
7
7
 
8
8
  DELIMITER_CANDIDATES = [
9
- ",", "\t", "|"
9
+ ",", "\t", "|", ";"
10
10
  ]
11
11
 
12
12
  QUOTE_CANDIDATES = [
@@ -41,8 +41,8 @@ module Embulk
41
41
  else
42
42
  delim = guess_delimiter(sample_lines)
43
43
  unless delim
44
- # not CSV file
45
- return {}
44
+ # assuming single column CSV
45
+ delim = DELIMITER_CANDIDATES.first
46
46
  end
47
47
  end
48
48
 
@@ -104,26 +104,48 @@ module Embulk
104
104
 
105
105
  sample_records = split_lines(parser_guessed, true, sample_lines, delim, {})
106
106
 
107
- first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
108
- other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
107
+ if sample_lines.size == 1
108
+ # The file contains only 1 line. Assume that there are no header line.
109
+ header_line = false
109
110
 
110
- if first_types.size <= 1 || other_types.size <= 1
111
- # guess failed
112
- return {}
113
- end
111
+ column_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
114
112
 
115
- unless parser_guessed.has_key?("trim_if_not_quoted")
116
- sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
117
- other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
118
- if other_types != other_types_trimmed
119
- parser_guessed["trim_if_not_quoted"] = true
120
- other_types = other_types_trimmed
121
- else
122
- parser_guessed["trim_if_not_quoted"] = false
113
+ unless parser_guessed.has_key?("trim_if_not_quoted")
114
+ sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
115
+ column_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed)
116
+ if column_types != column_types_trimmed
117
+ parser_guessed["trim_if_not_quoted"] = true
118
+ column_types = column_types_trimmed
119
+ else
120
+ parser_guessed["trim_if_not_quoted"] = false
121
+ end
122
+ end
123
+ else
124
+ # The file contains more than 1 line. If guessed first line's column types are all strings or boolean, and the types are
125
+ # different from the other lines, assume that the first line is column names.
126
+ first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
127
+ other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
128
+
129
+ unless parser_guessed.has_key?("trim_if_not_quoted")
130
+ sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
131
+ other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
132
+ if other_types != other_types_trimmed
133
+ parser_guessed["trim_if_not_quoted"] = true
134
+ other_types = other_types_trimmed
135
+ else
136
+ parser_guessed["trim_if_not_quoted"] = false
137
+ end
123
138
  end
139
+
140
+ header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) }) || guess_string_header_line(sample_records)
141
+ column_types = other_types
124
142
  end
125
143
 
126
- header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) }) || guess_string_header_line(sample_records)
144
+ if column_types.empty?
145
+ # TODO here is making the guessing failed if the file doesn't contain any columns. However,
146
+ # this may not be convenient for users.
147
+ return {}
148
+ end
127
149
 
128
150
  if header_line
129
151
  parser_guessed["skip_header_lines"] = skip_header_lines + 1
@@ -135,12 +157,12 @@ module Embulk
135
157
  parser_guessed["allow_optional_columns"] = false unless parser_guessed.has_key?("allow_optional_columns")
136
158
 
137
159
  if header_line
138
- column_names = sample_records.first
160
+ column_names = sample_records.first.map(&:strip)
139
161
  else
140
- column_names = (0..other_types.size).to_a.map {|i| "c#{i}" }
162
+ column_names = (0..column_types.size).to_a.map {|i| "c#{i}" }
141
163
  end
142
164
  schema = []
143
- column_names.zip(other_types).each do |name,type|
165
+ column_names.zip(column_types).each do |name,type|
144
166
  if name && type
145
167
  schema << new_column(name, type)
146
168
  end
@@ -313,7 +335,7 @@ module Embulk
313
335
  first = sample_records.first
314
336
  first.count.times do |column_index|
315
337
  lengths = sample_records.map {|row| row[column_index] }.compact.map {|v| v.to_s.size }
316
- if lengths.size > 2
338
+ if lengths.size > 1
317
339
  if array_variance(lengths[1..-1]) <= 0.2
318
340
  avg = array_avg(lengths[1..-1])
319
341
  if avg == 0.0 ? lengths[0] > 1 : (avg - lengths[0]).abs / avg > 0.7
@@ -4,10 +4,16 @@ module Embulk
4
4
  class NewlineGuessPlugin < TextGuessPlugin
5
5
  Plugin.register_guess('newline', self)
6
6
 
7
- def guess_text(config, sample_text)
8
- cr_count = sample_text.count("\r")
9
- lf_count = sample_text.count("\n")
10
- crlf_count = sample_text.scan(/\r\n/).length
7
+ def guess(config, sample)
8
+ if config.fetch('parser', {}).fetch('charset', nil).nil?
9
+ require 'embulk/guess/charset'
10
+ charset_guess = Guess::CharsetGuessPlugin.new
11
+ return charset_guess.guess(config, sample)
12
+ end
13
+
14
+ cr_count = sample.count("\r")
15
+ lf_count = sample.count("\n")
16
+ crlf_count = sample.scan(/\r\n/).length
11
17
  if crlf_count > cr_count / 2 && crlf_count > lf_count / 2
12
18
  return {"parser" => {"newline" => "CRLF"}}
13
19
  elsif cr_count > lf_count / 2
@@ -113,7 +113,9 @@ module Embulk
113
113
  while line = decoder.poll
114
114
  sample_lines << line
115
115
  end
116
- sample_lines.pop unless sample_lines.empty? # last line can be partial
116
+ unless sample.end_with?(parser_task.getNewline.getString)
117
+ sample_lines.pop if sample_lines.empty? # last line is partial
118
+ end
117
119
  end
118
120
 
119
121
  return guess_lines(config, sample_lines);