embulk 0.8.15 → 0.8.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -1
  3. data/appveyor.yml +8 -0
  4. data/build.gradle +86 -45
  5. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
  6. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
  7. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
  8. data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
  9. data/embulk-docs/src/built-in.rst +34 -0
  10. data/embulk-docs/src/release.rst +1 -0
  11. data/embulk-docs/src/release/release-0.8.16.rst +43 -0
  12. data/embulk-standards/build.gradle +1 -0
  13. data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
  14. data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
  15. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
  16. data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
  17. data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
  18. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
  19. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
  20. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
  21. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
  22. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
  23. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
  24. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
  25. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
  26. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
  27. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
  28. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
  29. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
  30. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
  31. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
  32. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
  33. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
  34. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
  35. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
  36. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
  37. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
  38. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
  39. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
  40. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
  41. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
  42. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
  43. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
  44. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
  45. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
  46. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
  47. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
  48. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
  49. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
  50. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
  51. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
  52. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
  53. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
  54. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
  55. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
  56. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
  57. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
  58. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
  59. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
  60. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
  61. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
  62. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
  63. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
  64. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
  65. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
  66. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
  67. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
  68. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
  69. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
  70. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
  71. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
  72. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
  73. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
  74. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
  75. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
  76. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
  77. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
  78. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
  79. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
  80. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
  81. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
  82. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
  83. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
  84. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
  85. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
  86. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
  87. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
  88. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
  89. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
  90. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
  91. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
  92. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
  93. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
  94. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
  95. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
  96. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
  97. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
  98. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
  99. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
  100. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
  101. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
  102. data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
  103. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  104. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  105. data/gradlew +30 -21
  106. data/gradlew.bat +4 -10
  107. data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
  108. data/lib/embulk/data/new/java/build.gradle.erb +5 -3
  109. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  110. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  111. data/lib/embulk/data/new/java/gradlew +30 -21
  112. data/lib/embulk/data/new/java/gradlew.bat +4 -10
  113. data/lib/embulk/guess/csv.rb +44 -22
  114. data/lib/embulk/guess/newline.rb +10 -4
  115. data/lib/embulk/guess_plugin.rb +3 -1
  116. data/lib/embulk/java/time_helper.rb +2 -2
  117. data/lib/embulk/version.rb +1 -1
  118. metadata +92 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c89d18ccf5e8011ff1071e37d9fa0016bd97f179
4
- data.tar.gz: d1137ad2c115b1bc006c5669e4c9a69aeb82a21d
3
+ metadata.gz: 8343679bf498ccc8d1519185737a9c54ced0d669
4
+ data.tar.gz: 3b737c7e3886c6915f503bd2960efb993ac3c4e6
5
5
  SHA512:
6
- metadata.gz: 5da8fb3f31164ea935cb28a437f6dde726b44443ce73aa1274bb39a72d98f465126c58ebc5c49499a1aa18858a166532e1625a38a8699eba0ccd48f7ff13f4f8
7
- data.tar.gz: e23084c691a1493e662149233c59acd1a6226413de86651c4c97348cb6921ed81d31808cdef4a664111dc86d41a851441562159c8fee0c966cc805dc5a80dbe1
6
+ metadata.gz: 5e11e26e3e9025d669a9cb9761d883e9470bd67e52a4019aa6d2b08b6ae2c44822fdea0be435862ebce21c7267ba2e08293e3e03441297e063b220a9fe9a29a6
7
+ data.tar.gz: 31b5b9f4f008951efbec4b84e75a823c4832d2ec5bd495bb4024c97687160008566aed4dbccfe25638da18995e3b46da4e885cd942e2e29e02709c672b56b694
data/README.md CHANGED
@@ -72,6 +72,7 @@ Embulk bundles some built-in plugins such as `embulk-encoder-gzip` or `embulk-fo
72
72
  in:
73
73
  type: file
74
74
  path_prefix: "./try1/csv/sample_"
75
+ ...
75
76
  out:
76
77
  type: command
77
78
  command: "cat - > task.$INDEX.$SEQID.csv.gz"
@@ -199,7 +200,7 @@ bintray_api_key=(bintray api key)
199
200
  Run following commands and follow its instruction:
200
201
 
201
202
  ```
202
- ./gradlew set_version -Pto=$VERSION
203
+ ./gradlew setVersion -Pto=$VERSION
203
204
  ```
204
205
 
205
206
  ```
@@ -3,7 +3,15 @@ version: "{branch} {build}"
3
3
  build:
4
4
  verbosity: detailed
5
5
 
6
+ install:
7
+ - ps: $zipPath = "$($env:USERPROFILE)\Handle.zip"
8
+ - ps: (New-Object Net.WebClient).DownloadFile('https://download.sysinternals.com/files/Handle.zip', $zipPath)
9
+ - ps: 7z x $zipPath -y -o"$env:APPVEYOR_BUILD_FOLDER" | Out-Null
10
+
11
+ # To avoid call-selfrun.bat FileNotFound exception, it installs and uses handle.exe as workaround provided by AppVeyor support.
12
+ # see http://help.appveyor.com/discussions/problems/5975-the-process-cannot-access-the-file-because-it-is-being-used-by-another-process
6
13
  build_script:
14
+ - handle.exe -a -u C:\projects\embulk\embulk-cli\build\classes\test\org\embulk\cli\call-selfrun.bat -nobanner
7
15
  - gradlew.bat --info --no-daemon check rubyTest
8
16
 
9
17
  cache:
@@ -4,7 +4,7 @@ buildscript {
4
4
  classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.3.1'
5
5
  classpath "com.github.jruby-gradle:jruby-gradle-jar-plugin:1.0.1"
6
6
  classpath 'com.github.ben-manes:gradle-versions-plugin:0.11.3'
7
- classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.2'
7
+ classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.3'
8
8
  }
9
9
  }
10
10
  apply plugin: "com.github.jruby-gradle.jar"
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards"), p
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.8.15'
19
+ version = '0.8.16'
20
20
 
21
21
  ext {
22
22
  jrubyVersion = '9.1.5.0'
@@ -151,7 +151,7 @@ subprojects {
151
151
  }
152
152
 
153
153
  task wrapper(type: Wrapper) {
154
- gradleVersion = '2.6'
154
+ gradleVersion = '3.2.1'
155
155
  }
156
156
 
157
157
  import com.github.jrubygradle.JRubyExec
@@ -201,19 +201,22 @@ task updateResources(dependsOn: 'classpath') {
201
201
  //
202
202
  // classpath task
203
203
  //
204
- task classpath(dependsOn: ['build', ':embulk-cli:classpath']) << { }
204
+ task classpath(dependsOn: ['build', ':embulk-cli:classpath']) { doLast {} }
205
+
205
206
  clean { delete 'classpath' }
206
207
 
207
208
  //
208
209
  // cli task
209
210
  //
210
- task cli(dependsOn: ':embulk-cli:shadowJar') << {
211
- file('pkg').mkdirs()
212
- File f = file("pkg/embulk-${project.version}.jar")
213
- f.write("")
214
- f.append(file("embulk-cli/src/main/sh/selfrun.sh").readBytes())
215
- f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
216
- f.setExecutable(true)
211
+ task cli(dependsOn: ':embulk-cli:shadowJar') {
212
+ doLast {
213
+ file('pkg').mkdirs()
214
+ File f = file("pkg/embulk-${project.version}.jar")
215
+ f.write("")
216
+ f.append(file("embulk-cli/src/main/sh/selfrun.sh").readBytes())
217
+ f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
218
+ f.setExecutable(true)
219
+ }
217
220
  }
218
221
  bintrayUpload.dependsOn(['cli'])
219
222
 
@@ -282,58 +285,96 @@ task rubyGemsUpload(type: Exec, dependsOn: ["gem", "rubyGemsUploadJRuby"]) {
282
285
  //
283
286
  // releaseCheck and release tasks
284
287
  //
285
- task releaseCheck << {
286
- if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
287
- throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
288
- }
289
- if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains("${project.version}")) {
290
- throw new GradleException("Release note for ${project.version} doesn't exist")
291
- }
292
- if (!file("embulk-docs/src/release.rst").getText().contains("release-${project.version}")) {
293
- throw new GradleException("embulk-docs/src/release.rst doesn't include release-${project.version}")
294
- }
295
- String date = new Date().format("yyyy-MM-dd")
296
- if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
297
- throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
288
+ task releaseCheck {
289
+ doLast {
290
+ if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
291
+ throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
292
+ }
293
+ if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains("${project.version}")) {
294
+ throw new GradleException("Release note for ${project.version} doesn't exist")
295
+ }
296
+ if (!file("embulk-docs/src/release.rst").getText().contains("release-${project.version}")) {
297
+ throw new GradleException("embulk-docs/src/release.rst doesn't include release-${project.version}")
298
+ }
299
+ String date = new Date().format("yyyy-MM-dd")
300
+ if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
301
+ throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
302
+ }
303
+ // TODO check git-ls-files includes release-<version>.rst file
304
+ println "Ready. Run 'release' task."
298
305
  }
299
- // TODO check git-ls-files includes release-<version>.rst file
300
- println "Ready. Run 'release' task."
301
306
  }
302
307
 
303
- task release(dependsOn: ["cli", "releaseCheck", "bintrayUpload", "rubyGemsUpload"]) << {
304
- println """
308
+ task release(dependsOn: ["cli", "releaseCheck", "bintrayUpload", "rubyGemsUpload"]) {
309
+ doLast {
310
+ println """
305
311
  Manual operations:
306
312
 
307
313
  git commit -am v${project.version}
308
314
  git tag v${project.version}
309
315
 
310
316
  """
317
+ }
311
318
  }
312
319
  bintrayUpload.mustRunAfter('releaseCheck')
313
320
  rubyGemsUpload.mustRunAfter('releaseCheck')
314
321
  rubyGemsUploadJRuby.mustRunAfter('releaseCheck')
315
322
 
316
- task setVersion << {
317
- if (!project.hasProperty("to")) {
318
- throw new GradleException("Usage: ./gradlew setVersion -Pto=VERSION")
319
- }
323
+ task setVersion {
324
+ doLast {
325
+ if (!project.hasProperty("to")) {
326
+ throw new GradleException("Usage: ./gradlew setVersion -Pto=VERSION")
327
+ }
328
+
329
+ File gradle_ver = file('build.gradle')
330
+ gradle_ver.write(gradle_ver.getText().replaceFirst("version = '(\\d+)(\\.\\d+){2}'", "version = '${to}'"))
320
331
 
321
- File gradle_ver = file('build.gradle')
322
- gradle_ver.write(gradle_ver.getText().replaceFirst("version = '(\\d+)(\\.\\d+){2}'", "version = '${to}'"))
332
+ File ruby_ver = file('lib/embulk/version.rb')
333
+ ruby_ver.write(ruby_ver.getText().replaceFirst("VERSION = '(\\d+)(\\.\\d+){2}'", "VERSION = '${to}'"))
334
+
335
+ List<String> docs = [
336
+ 'README.md',
337
+ ]
338
+ docs.each() { path ->
339
+ File doc = file(path)
340
+ doc.write(doc.getText().replaceAll('embulk-(\\d+)(\\.\\d+){2}', "embulk-${to}"))
341
+ }
323
342
 
324
- File ruby_ver = file('lib/embulk/version.rb')
325
- ruby_ver.write(ruby_ver.getText().replaceFirst("VERSION = '(\\d+)(\\.\\d+){2}'", "VERSION = '${to}'"))
343
+ file("embulk-docs/src/release/release-${to}.rst").append("")
344
+ "git add embulk-docs/src/release/release-${to}.rst".execute().waitFor()
326
345
 
327
- List<String> docs = [
328
- 'README.md',
329
- ]
330
- docs.each() { path ->
331
- File doc = file(path)
332
- doc.write(doc.getText().replaceAll('embulk-(\\d+)(\\.\\d+){2}', "embulk-${to}"))
346
+ println "add 'release/release-${to}' line to embulk-docs/src/release.rst"
333
347
  }
348
+ }
349
+
350
+ task updateJRuby {
351
+ doLast {
352
+ if (!project.hasProperty("to")) {
353
+ throw new GradleException("Usage: ./gradlew updateJRuby -Pto=VERSION")
354
+ }
334
355
 
335
- file("embulk-docs/src/release/release-${to}.rst").append("")
336
- "git add embulk-docs/src/release/release-${to}.rst".execute().waitFor()
356
+ File gradle_ver = file('build.gradle')
357
+ gradle_ver.write(gradle_ver.getText().replaceFirst("jrubyVersion = '(\\d+)(\\.\\d+){3}'", "jrubyVersion = '${to}'"))
337
358
 
338
- println "add 'release/release-${to}' line to embulk-docs/src/release.rst"
359
+ File gemspec_ver = file('embulk.gemspec')
360
+ gemspec_ver.write(gemspec_ver.getText().replaceFirst("gem.add_dependency \"jruby-jars\", '= (\\d+)(\\.\\d+){3}'", "gem.add_dependency \"jruby-jars\", '= ${to}'"))
361
+
362
+ File migrate_plugin_ver = file('lib/embulk/command/embulk_migrate_plugin.rb')
363
+ migrate_plugin_ver.write(migrate_plugin_ver.getText().replaceFirst("\".ruby-version\", \"jruby-(\\d+)(\\.\\d+){3}\"", "\".ruby-version\", \"jruby-${to}\""))
364
+
365
+ File gemfile_lock_ver = file('Gemfile.lock')
366
+ gemfile_lock_ver.write(gemfile_lock_ver.getText().replaceFirst("jruby-jars \\(= (\\d+)(\\.\\d+){3}\\)", "jruby-jars (= ${to})"))
367
+
368
+ gemfile_lock_ver.write(gemfile_lock_ver.getText().replaceFirst("jruby-jars \\((\\d+)(\\.\\d+){3}\\)", "jruby-jars (${to})"))
369
+
370
+ List<String> dot_ruby_vers = [
371
+ 'lib/embulk/data/bundle/.ruby-version',
372
+ 'lib/embulk/data/new/ruby/.ruby-version'
373
+ ]
374
+ dot_ruby_vers.each() { path ->
375
+ File dot_ruby_ver = file(path)
376
+ dot_ruby_ver.write(dot_ruby_ver.getText().replaceAll('jruby-(\\d+)(\\.\\d+){3}', "jruby-${to}"))
377
+ }
378
+
379
+ }
339
380
  }
@@ -4,7 +4,7 @@ import java.util.Set;
4
4
  import javax.validation.ConstraintViolation;
5
5
 
6
6
  public class TaskValidationException
7
- extends RuntimeException
7
+ extends ConfigException
8
8
  {
9
9
  @SuppressWarnings("unchecked")
10
10
  private final Set violations;
@@ -37,6 +37,8 @@ public class SamplingParserPlugin
37
37
  if (taskCount == 0) {
38
38
  throw new NoSampleException("No input files to read sample data");
39
39
  }
40
+ int maxSize = -1;
41
+ int maxSizeTaskIndex = -1;
40
42
  for (int taskIndex=0; taskIndex < taskCount; taskIndex++) {
41
43
  try {
42
44
  runner.run(taskSource, schema, taskIndex, new PageOutput() {
@@ -51,10 +53,33 @@ public class SamplingParserPlugin
51
53
  public void close() { }
52
54
  });
53
55
  } catch (NotEnoughSampleError ex) {
56
+ if (maxSize < ex.getSize()) {
57
+ maxSize = ex.getSize();
58
+ maxSizeTaskIndex = taskIndex;
59
+ }
54
60
  continue;
55
61
  }
56
62
  }
57
- throw new NoSampleException("All input files are smaller than minimum sampling size"); // TODO include minSampleSize in message
63
+ if (maxSize <= 0) {
64
+ throw new NoSampleException("All input files are empty");
65
+ }
66
+ taskSource.getNested("ParserTaskSource").set("force", true);
67
+ try {
68
+ runner.run(taskSource, schema, maxSizeTaskIndex, new PageOutput() {
69
+ @Override
70
+ public void add(Page page)
71
+ {
72
+ throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
73
+ }
74
+
75
+ public void finish() { }
76
+
77
+ public void close() { }
78
+ });
79
+ } catch (NotEnoughSampleError ex) {
80
+ throw new NoSampleException("All input files are smaller than minimum sampling size");
81
+ }
82
+ throw new NoSampleException("All input files are smaller than minimum sampling size");
58
83
  }
59
84
  });
60
85
  throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
@@ -81,7 +106,19 @@ public class SamplingParserPlugin
81
106
 
82
107
  public static class NotEnoughSampleError
83
108
  extends Error
84
- { }
109
+ {
110
+ private final int size;
111
+
112
+ public NotEnoughSampleError(int size)
113
+ {
114
+ this.size = size;
115
+ }
116
+
117
+ public int getSize()
118
+ {
119
+ return size;
120
+ }
121
+ }
85
122
 
86
123
  private final int minSampleSize;
87
124
  private final int sampleSize;
@@ -105,8 +142,10 @@ public class SamplingParserPlugin
105
142
  FileInput input, PageOutput output)
106
143
  {
107
144
  Buffer buffer = readSample(input, sampleSize);
108
- if (buffer.limit() < minSampleSize) {
109
- throw new NotEnoughSampleError();
145
+ if (!taskSource.get(boolean.class, "force", false)) {
146
+ if (buffer.limit() < minSampleSize) {
147
+ throw new NotEnoughSampleError(buffer.limit());
148
+ }
110
149
  }
111
150
  throw new SampledNoticeError(buffer);
112
151
  }
@@ -123,6 +123,11 @@ public class PageBuilder
123
123
 
124
124
  public void setString(int columnIndex, String value)
125
125
  {
126
+ if (value == null) {
127
+ setNull(columnIndex);
128
+ return;
129
+ }
130
+
126
131
  Integer reuseIndex = stringReferences.get(value);
127
132
  if (reuseIndex != null) {
128
133
  bufferSlice.setInt(getOffset(columnIndex), reuseIndex);
@@ -143,6 +148,11 @@ public class PageBuilder
143
148
 
144
149
  public void setJson(int columnIndex, Value value)
145
150
  {
151
+ if (value == null) {
152
+ setNull(columnIndex);
153
+ return;
154
+ }
155
+
146
156
  int index = valueReferences.size();
147
157
  valueReferences.add(value.immutableValue());
148
158
  bufferSlice.setInt(getOffset(columnIndex), index);
@@ -158,6 +168,11 @@ public class PageBuilder
158
168
 
159
169
  public void setTimestamp(int columnIndex, Timestamp value)
160
170
  {
171
+ if (value == null) {
172
+ setNull(columnIndex);
173
+ return;
174
+ }
175
+
161
176
  int offset = getOffset(columnIndex);
162
177
  bufferSlice.setLong(offset, value.getEpochSecond());
163
178
  bufferSlice.setInt(offset + 8, value.getNano());
@@ -15,6 +15,8 @@ public class ResumableInputStream
15
15
  protected InputStream in;
16
16
  private long offset;
17
17
  private long markedOffset;
18
+ private Exception lastClosedCause;
19
+ private boolean closed;
18
20
 
19
21
  public ResumableInputStream(InputStream initialInputStream, Reopener reopener)
20
22
  {
@@ -22,6 +24,7 @@ public class ResumableInputStream
22
24
  this.in = initialInputStream;
23
25
  this.offset = 0L;
24
26
  this.markedOffset = 0L;
27
+ this.lastClosedCause = null;
25
28
  }
26
29
 
27
30
  public ResumableInputStream(Reopener reopener) throws IOException
@@ -32,6 +35,7 @@ public class ResumableInputStream
32
35
  private void reopen(Exception closedCause) throws IOException
33
36
  {
34
37
  if (in != null) {
38
+ lastClosedCause = closedCause;
35
39
  try {
36
40
  in.close();
37
41
  } catch (IOException ignored) {
@@ -39,11 +43,13 @@ public class ResumableInputStream
39
43
  in = null;
40
44
  }
41
45
  in = reopener.reopen(offset, closedCause);
46
+ lastClosedCause = null;
42
47
  }
43
48
 
44
49
  @Override
45
50
  public int read() throws IOException
46
51
  {
52
+ ensureOpened();
47
53
  while (true) {
48
54
  try {
49
55
  int v = in.read();
@@ -58,6 +64,7 @@ public class ResumableInputStream
58
64
  @Override
59
65
  public int read(byte[] b) throws IOException
60
66
  {
67
+ ensureOpened();
61
68
  while (true) {
62
69
  try {
63
70
  int r = in.read(b);
@@ -72,6 +79,7 @@ public class ResumableInputStream
72
79
  @Override
73
80
  public int read(byte[] b, int off, int len) throws IOException
74
81
  {
82
+ ensureOpened();
75
83
  while (true) {
76
84
  try {
77
85
  int r = in.read(b, off, len);
@@ -86,6 +94,7 @@ public class ResumableInputStream
86
94
  @Override
87
95
  public long skip(long n) throws IOException
88
96
  {
97
+ ensureOpened();
89
98
  while (true) {
90
99
  try {
91
100
  long r = in.skip(n);
@@ -100,18 +109,29 @@ public class ResumableInputStream
100
109
  @Override
101
110
  public int available() throws IOException
102
111
  {
112
+ ensureOpened();
103
113
  return in.available();
104
114
  }
105
115
 
106
116
  @Override
107
117
  public void close() throws IOException
108
118
  {
109
- in.close();
119
+ if (in != null) {
120
+ in.close();
121
+ closed = true;
122
+ in = null;
123
+ }
110
124
  }
111
125
 
112
126
  @Override
113
127
  public void mark(int readlimit)
114
128
  {
129
+ try {
130
+ ensureOpened();
131
+ }
132
+ catch (IOException ex) {
133
+ throw new RuntimeException(ex);
134
+ }
115
135
  in.mark(readlimit);
116
136
  markedOffset = offset;
117
137
  }
@@ -119,6 +139,7 @@ public class ResumableInputStream
119
139
  @Override
120
140
  public void reset() throws IOException
121
141
  {
142
+ ensureOpened();
122
143
  in.reset();
123
144
  offset = markedOffset;
124
145
  }
@@ -126,6 +147,22 @@ public class ResumableInputStream
126
147
  @Override
127
148
  public boolean markSupported()
128
149
  {
150
+ try {
151
+ ensureOpened();
152
+ }
153
+ catch (IOException ex) {
154
+ throw new RuntimeException(ex);
155
+ }
129
156
  return in.markSupported();
130
157
  }
158
+
159
+ private void ensureOpened() throws IOException
160
+ {
161
+ if (in == null) {
162
+ if (closed) {
163
+ throw new IOException("stream closed");
164
+ }
165
+ reopen(lastClosedCause);
166
+ }
167
+ }
131
168
  }