embulk 0.8.15-java → 0.8.16-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/appveyor.yml +8 -0
- data/build.gradle +86 -45
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
- data/embulk-docs/src/built-in.rst +34 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.16.rst +43 -0
- data/embulk-standards/build.gradle +1 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +30 -21
- data/gradlew.bat +4 -10
- data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
- data/lib/embulk/data/new/java/build.gradle.erb +5 -3
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +30 -21
- data/lib/embulk/data/new/java/gradlew.bat +4 -10
- data/lib/embulk/guess/csv.rb +44 -22
- data/lib/embulk/guess/newline.rb +10 -4
- data/lib/embulk/guess_plugin.rb +3 -1
- data/lib/embulk/java/time_helper.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- metadata +92 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2bfd43d0575827d74caa260f2e2066353d59a00
|
4
|
+
data.tar.gz: 49dcc57de54eaa44d7f40acadc6f84726eeb950e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc1c844b5f3309a953dad100840df3d54716d97383e5c67216117d433c0dae460c29fb282f2c9a59e87c114a00c89d2a8bb1afe1e1e60e5b810178b406fd7ef5
|
7
|
+
data.tar.gz: e0e075694ca6cf82278650b6bb82162c368bb21e34e83ac37873f40a3d06bc40c964d8410bc57f5eb05ec2a3de1cbe93c508ac4ac16c6c2404f953b4a6d982f1
|
data/README.md
CHANGED
@@ -72,6 +72,7 @@ Embulk bundles some built-in plugins such as `embulk-encoder-gzip` or `embulk-fo
|
|
72
72
|
in:
|
73
73
|
type: file
|
74
74
|
path_prefix: "./try1/csv/sample_"
|
75
|
+
...
|
75
76
|
out:
|
76
77
|
type: command
|
77
78
|
command: "cat - > task.$INDEX.$SEQID.csv.gz"
|
@@ -199,7 +200,7 @@ bintray_api_key=(bintray api key)
|
|
199
200
|
Run following commands and follow its instruction:
|
200
201
|
|
201
202
|
```
|
202
|
-
./gradlew
|
203
|
+
./gradlew setVersion -Pto=$VERSION
|
203
204
|
```
|
204
205
|
|
205
206
|
```
|
data/appveyor.yml
CHANGED
@@ -3,7 +3,15 @@ version: "{branch} {build}"
|
|
3
3
|
build:
|
4
4
|
verbosity: detailed
|
5
5
|
|
6
|
+
install:
|
7
|
+
- ps: $zipPath = "$($env:USERPROFILE)\Handle.zip"
|
8
|
+
- ps: (New-Object Net.WebClient).DownloadFile('https://download.sysinternals.com/files/Handle.zip', $zipPath)
|
9
|
+
- ps: 7z x $zipPath -y -o"$env:APPVEYOR_BUILD_FOLDER" | Out-Null
|
10
|
+
|
11
|
+
# To avoid call-selfrun.bat FileNotFound exception, it installs and uses handle.exe as workaround provided by AppVeyor support.
|
12
|
+
# see http://help.appveyor.com/discussions/problems/5975-the-process-cannot-access-the-file-because-it-is-being-used-by-another-process
|
6
13
|
build_script:
|
14
|
+
- handle.exe -a -u C:\projects\embulk\embulk-cli\build\classes\test\org\embulk\cli\call-selfrun.bat -nobanner
|
7
15
|
- gradlew.bat --info --no-daemon check rubyTest
|
8
16
|
|
9
17
|
cache:
|
data/build.gradle
CHANGED
@@ -4,7 +4,7 @@ buildscript {
|
|
4
4
|
classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.3.1'
|
5
5
|
classpath "com.github.jruby-gradle:jruby-gradle-jar-plugin:1.0.1"
|
6
6
|
classpath 'com.github.ben-manes:gradle-versions-plugin:0.11.3'
|
7
|
-
classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.
|
7
|
+
classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.3'
|
8
8
|
}
|
9
9
|
}
|
10
10
|
apply plugin: "com.github.jruby-gradle.jar"
|
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards"), p
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.8.
|
19
|
+
version = '0.8.16'
|
20
20
|
|
21
21
|
ext {
|
22
22
|
jrubyVersion = '9.1.5.0'
|
@@ -151,7 +151,7 @@ subprojects {
|
|
151
151
|
}
|
152
152
|
|
153
153
|
task wrapper(type: Wrapper) {
|
154
|
-
gradleVersion = '2.
|
154
|
+
gradleVersion = '3.2.1'
|
155
155
|
}
|
156
156
|
|
157
157
|
import com.github.jrubygradle.JRubyExec
|
@@ -201,19 +201,22 @@ task updateResources(dependsOn: 'classpath') {
|
|
201
201
|
//
|
202
202
|
// classpath task
|
203
203
|
//
|
204
|
-
task classpath(dependsOn: ['build', ':embulk-cli:classpath'])
|
204
|
+
task classpath(dependsOn: ['build', ':embulk-cli:classpath']) { doLast {} }
|
205
|
+
|
205
206
|
clean { delete 'classpath' }
|
206
207
|
|
207
208
|
//
|
208
209
|
// cli task
|
209
210
|
//
|
210
|
-
task cli(dependsOn: ':embulk-cli:shadowJar')
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
211
|
+
task cli(dependsOn: ':embulk-cli:shadowJar') {
|
212
|
+
doLast {
|
213
|
+
file('pkg').mkdirs()
|
214
|
+
File f = file("pkg/embulk-${project.version}.jar")
|
215
|
+
f.write("")
|
216
|
+
f.append(file("embulk-cli/src/main/sh/selfrun.sh").readBytes())
|
217
|
+
f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
|
218
|
+
f.setExecutable(true)
|
219
|
+
}
|
217
220
|
}
|
218
221
|
bintrayUpload.dependsOn(['cli'])
|
219
222
|
|
@@ -282,58 +285,96 @@ task rubyGemsUpload(type: Exec, dependsOn: ["gem", "rubyGemsUploadJRuby"]) {
|
|
282
285
|
//
|
283
286
|
// releaseCheck and release tasks
|
284
287
|
//
|
285
|
-
task releaseCheck
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
288
|
+
task releaseCheck {
|
289
|
+
doLast {
|
290
|
+
if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
|
291
|
+
throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
|
292
|
+
}
|
293
|
+
if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains("${project.version}")) {
|
294
|
+
throw new GradleException("Release note for ${project.version} doesn't exist")
|
295
|
+
}
|
296
|
+
if (!file("embulk-docs/src/release.rst").getText().contains("release-${project.version}")) {
|
297
|
+
throw new GradleException("embulk-docs/src/release.rst doesn't include release-${project.version}")
|
298
|
+
}
|
299
|
+
String date = new Date().format("yyyy-MM-dd")
|
300
|
+
if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
|
301
|
+
throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
|
302
|
+
}
|
303
|
+
// TODO check git-ls-files includes release-<version>.rst file
|
304
|
+
println "Ready. Run 'release' task."
|
298
305
|
}
|
299
|
-
// TODO check git-ls-files includes release-<version>.rst file
|
300
|
-
println "Ready. Run 'release' task."
|
301
306
|
}
|
302
307
|
|
303
|
-
task release(dependsOn: ["cli", "releaseCheck", "bintrayUpload", "rubyGemsUpload"])
|
304
|
-
|
308
|
+
task release(dependsOn: ["cli", "releaseCheck", "bintrayUpload", "rubyGemsUpload"]) {
|
309
|
+
doLast {
|
310
|
+
println """
|
305
311
|
Manual operations:
|
306
312
|
|
307
313
|
git commit -am v${project.version}
|
308
314
|
git tag v${project.version}
|
309
315
|
|
310
316
|
"""
|
317
|
+
}
|
311
318
|
}
|
312
319
|
bintrayUpload.mustRunAfter('releaseCheck')
|
313
320
|
rubyGemsUpload.mustRunAfter('releaseCheck')
|
314
321
|
rubyGemsUploadJRuby.mustRunAfter('releaseCheck')
|
315
322
|
|
316
|
-
task setVersion
|
317
|
-
|
318
|
-
|
319
|
-
|
323
|
+
task setVersion {
|
324
|
+
doLast {
|
325
|
+
if (!project.hasProperty("to")) {
|
326
|
+
throw new GradleException("Usage: ./gradlew setVersion -Pto=VERSION")
|
327
|
+
}
|
328
|
+
|
329
|
+
File gradle_ver = file('build.gradle')
|
330
|
+
gradle_ver.write(gradle_ver.getText().replaceFirst("version = '(\\d+)(\\.\\d+){2}'", "version = '${to}'"))
|
320
331
|
|
321
|
-
|
322
|
-
|
332
|
+
File ruby_ver = file('lib/embulk/version.rb')
|
333
|
+
ruby_ver.write(ruby_ver.getText().replaceFirst("VERSION = '(\\d+)(\\.\\d+){2}'", "VERSION = '${to}'"))
|
334
|
+
|
335
|
+
List<String> docs = [
|
336
|
+
'README.md',
|
337
|
+
]
|
338
|
+
docs.each() { path ->
|
339
|
+
File doc = file(path)
|
340
|
+
doc.write(doc.getText().replaceAll('embulk-(\\d+)(\\.\\d+){2}', "embulk-${to}"))
|
341
|
+
}
|
323
342
|
|
324
|
-
|
325
|
-
|
343
|
+
file("embulk-docs/src/release/release-${to}.rst").append("")
|
344
|
+
"git add embulk-docs/src/release/release-${to}.rst".execute().waitFor()
|
326
345
|
|
327
|
-
|
328
|
-
'README.md',
|
329
|
-
]
|
330
|
-
docs.each() { path ->
|
331
|
-
File doc = file(path)
|
332
|
-
doc.write(doc.getText().replaceAll('embulk-(\\d+)(\\.\\d+){2}', "embulk-${to}"))
|
346
|
+
println "add 'release/release-${to}' line to embulk-docs/src/release.rst"
|
333
347
|
}
|
348
|
+
}
|
349
|
+
|
350
|
+
task updateJRuby {
|
351
|
+
doLast {
|
352
|
+
if (!project.hasProperty("to")) {
|
353
|
+
throw new GradleException("Usage: ./gradlew updateJRuby -Pto=VERSION")
|
354
|
+
}
|
334
355
|
|
335
|
-
|
336
|
-
|
356
|
+
File gradle_ver = file('build.gradle')
|
357
|
+
gradle_ver.write(gradle_ver.getText().replaceFirst("jrubyVersion = '(\\d+)(\\.\\d+){3}'", "jrubyVersion = '${to}'"))
|
337
358
|
|
338
|
-
|
359
|
+
File gemspec_ver = file('embulk.gemspec')
|
360
|
+
gemspec_ver.write(gemspec_ver.getText().replaceFirst("gem.add_dependency \"jruby-jars\", '= (\\d+)(\\.\\d+){3}'", "gem.add_dependency \"jruby-jars\", '= ${to}'"))
|
361
|
+
|
362
|
+
File migrate_plugin_ver = file('lib/embulk/command/embulk_migrate_plugin.rb')
|
363
|
+
migrate_plugin_ver.write(migrate_plugin_ver.getText().replaceFirst("\".ruby-version\", \"jruby-(\\d+)(\\.\\d+){3}\"", "\".ruby-version\", \"jruby-${to}\""))
|
364
|
+
|
365
|
+
File gemfile_lock_ver = file('Gemfile.lock')
|
366
|
+
gemfile_lock_ver.write(gemfile_lock_ver.getText().replaceFirst("jruby-jars \\(= (\\d+)(\\.\\d+){3}\\)", "jruby-jars (= ${to})"))
|
367
|
+
|
368
|
+
gemfile_lock_ver.write(gemfile_lock_ver.getText().replaceFirst("jruby-jars \\((\\d+)(\\.\\d+){3}\\)", "jruby-jars (${to})"))
|
369
|
+
|
370
|
+
List<String> dot_ruby_vers = [
|
371
|
+
'lib/embulk/data/bundle/.ruby-version',
|
372
|
+
'lib/embulk/data/new/ruby/.ruby-version'
|
373
|
+
]
|
374
|
+
dot_ruby_vers.each() { path ->
|
375
|
+
File dot_ruby_ver = file(path)
|
376
|
+
dot_ruby_ver.write(dot_ruby_ver.getText().replaceAll('jruby-(\\d+)(\\.\\d+){3}', "jruby-${to}"))
|
377
|
+
}
|
378
|
+
|
379
|
+
}
|
339
380
|
}
|
@@ -37,6 +37,8 @@ public class SamplingParserPlugin
|
|
37
37
|
if (taskCount == 0) {
|
38
38
|
throw new NoSampleException("No input files to read sample data");
|
39
39
|
}
|
40
|
+
int maxSize = -1;
|
41
|
+
int maxSizeTaskIndex = -1;
|
40
42
|
for (int taskIndex=0; taskIndex < taskCount; taskIndex++) {
|
41
43
|
try {
|
42
44
|
runner.run(taskSource, schema, taskIndex, new PageOutput() {
|
@@ -51,10 +53,33 @@ public class SamplingParserPlugin
|
|
51
53
|
public void close() { }
|
52
54
|
});
|
53
55
|
} catch (NotEnoughSampleError ex) {
|
56
|
+
if (maxSize < ex.getSize()) {
|
57
|
+
maxSize = ex.getSize();
|
58
|
+
maxSizeTaskIndex = taskIndex;
|
59
|
+
}
|
54
60
|
continue;
|
55
61
|
}
|
56
62
|
}
|
57
|
-
|
63
|
+
if (maxSize <= 0) {
|
64
|
+
throw new NoSampleException("All input files are empty");
|
65
|
+
}
|
66
|
+
taskSource.getNested("ParserTaskSource").set("force", true);
|
67
|
+
try {
|
68
|
+
runner.run(taskSource, schema, maxSizeTaskIndex, new PageOutput() {
|
69
|
+
@Override
|
70
|
+
public void add(Page page)
|
71
|
+
{
|
72
|
+
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
|
73
|
+
}
|
74
|
+
|
75
|
+
public void finish() { }
|
76
|
+
|
77
|
+
public void close() { }
|
78
|
+
});
|
79
|
+
} catch (NotEnoughSampleError ex) {
|
80
|
+
throw new NoSampleException("All input files are smaller than minimum sampling size");
|
81
|
+
}
|
82
|
+
throw new NoSampleException("All input files are smaller than minimum sampling size");
|
58
83
|
}
|
59
84
|
});
|
60
85
|
throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
|
@@ -81,7 +106,19 @@ public class SamplingParserPlugin
|
|
81
106
|
|
82
107
|
public static class NotEnoughSampleError
|
83
108
|
extends Error
|
84
|
-
{
|
109
|
+
{
|
110
|
+
private final int size;
|
111
|
+
|
112
|
+
public NotEnoughSampleError(int size)
|
113
|
+
{
|
114
|
+
this.size = size;
|
115
|
+
}
|
116
|
+
|
117
|
+
public int getSize()
|
118
|
+
{
|
119
|
+
return size;
|
120
|
+
}
|
121
|
+
}
|
85
122
|
|
86
123
|
private final int minSampleSize;
|
87
124
|
private final int sampleSize;
|
@@ -105,8 +142,10 @@ public class SamplingParserPlugin
|
|
105
142
|
FileInput input, PageOutput output)
|
106
143
|
{
|
107
144
|
Buffer buffer = readSample(input, sampleSize);
|
108
|
-
if (
|
109
|
-
|
145
|
+
if (!taskSource.get(boolean.class, "force", false)) {
|
146
|
+
if (buffer.limit() < minSampleSize) {
|
147
|
+
throw new NotEnoughSampleError(buffer.limit());
|
148
|
+
}
|
110
149
|
}
|
111
150
|
throw new SampledNoticeError(buffer);
|
112
151
|
}
|
@@ -123,6 +123,11 @@ public class PageBuilder
|
|
123
123
|
|
124
124
|
public void setString(int columnIndex, String value)
|
125
125
|
{
|
126
|
+
if (value == null) {
|
127
|
+
setNull(columnIndex);
|
128
|
+
return;
|
129
|
+
}
|
130
|
+
|
126
131
|
Integer reuseIndex = stringReferences.get(value);
|
127
132
|
if (reuseIndex != null) {
|
128
133
|
bufferSlice.setInt(getOffset(columnIndex), reuseIndex);
|
@@ -143,6 +148,11 @@ public class PageBuilder
|
|
143
148
|
|
144
149
|
public void setJson(int columnIndex, Value value)
|
145
150
|
{
|
151
|
+
if (value == null) {
|
152
|
+
setNull(columnIndex);
|
153
|
+
return;
|
154
|
+
}
|
155
|
+
|
146
156
|
int index = valueReferences.size();
|
147
157
|
valueReferences.add(value.immutableValue());
|
148
158
|
bufferSlice.setInt(getOffset(columnIndex), index);
|
@@ -158,6 +168,11 @@ public class PageBuilder
|
|
158
168
|
|
159
169
|
public void setTimestamp(int columnIndex, Timestamp value)
|
160
170
|
{
|
171
|
+
if (value == null) {
|
172
|
+
setNull(columnIndex);
|
173
|
+
return;
|
174
|
+
}
|
175
|
+
|
161
176
|
int offset = getOffset(columnIndex);
|
162
177
|
bufferSlice.setLong(offset, value.getEpochSecond());
|
163
178
|
bufferSlice.setInt(offset + 8, value.getNano());
|
@@ -15,6 +15,8 @@ public class ResumableInputStream
|
|
15
15
|
protected InputStream in;
|
16
16
|
private long offset;
|
17
17
|
private long markedOffset;
|
18
|
+
private Exception lastClosedCause;
|
19
|
+
private boolean closed;
|
18
20
|
|
19
21
|
public ResumableInputStream(InputStream initialInputStream, Reopener reopener)
|
20
22
|
{
|
@@ -22,6 +24,7 @@ public class ResumableInputStream
|
|
22
24
|
this.in = initialInputStream;
|
23
25
|
this.offset = 0L;
|
24
26
|
this.markedOffset = 0L;
|
27
|
+
this.lastClosedCause = null;
|
25
28
|
}
|
26
29
|
|
27
30
|
public ResumableInputStream(Reopener reopener) throws IOException
|
@@ -32,6 +35,7 @@ public class ResumableInputStream
|
|
32
35
|
private void reopen(Exception closedCause) throws IOException
|
33
36
|
{
|
34
37
|
if (in != null) {
|
38
|
+
lastClosedCause = closedCause;
|
35
39
|
try {
|
36
40
|
in.close();
|
37
41
|
} catch (IOException ignored) {
|
@@ -39,11 +43,13 @@ public class ResumableInputStream
|
|
39
43
|
in = null;
|
40
44
|
}
|
41
45
|
in = reopener.reopen(offset, closedCause);
|
46
|
+
lastClosedCause = null;
|
42
47
|
}
|
43
48
|
|
44
49
|
@Override
|
45
50
|
public int read() throws IOException
|
46
51
|
{
|
52
|
+
ensureOpened();
|
47
53
|
while (true) {
|
48
54
|
try {
|
49
55
|
int v = in.read();
|
@@ -58,6 +64,7 @@ public class ResumableInputStream
|
|
58
64
|
@Override
|
59
65
|
public int read(byte[] b) throws IOException
|
60
66
|
{
|
67
|
+
ensureOpened();
|
61
68
|
while (true) {
|
62
69
|
try {
|
63
70
|
int r = in.read(b);
|
@@ -72,6 +79,7 @@ public class ResumableInputStream
|
|
72
79
|
@Override
|
73
80
|
public int read(byte[] b, int off, int len) throws IOException
|
74
81
|
{
|
82
|
+
ensureOpened();
|
75
83
|
while (true) {
|
76
84
|
try {
|
77
85
|
int r = in.read(b, off, len);
|
@@ -86,6 +94,7 @@ public class ResumableInputStream
|
|
86
94
|
@Override
|
87
95
|
public long skip(long n) throws IOException
|
88
96
|
{
|
97
|
+
ensureOpened();
|
89
98
|
while (true) {
|
90
99
|
try {
|
91
100
|
long r = in.skip(n);
|
@@ -100,18 +109,29 @@ public class ResumableInputStream
|
|
100
109
|
@Override
|
101
110
|
public int available() throws IOException
|
102
111
|
{
|
112
|
+
ensureOpened();
|
103
113
|
return in.available();
|
104
114
|
}
|
105
115
|
|
106
116
|
@Override
|
107
117
|
public void close() throws IOException
|
108
118
|
{
|
109
|
-
in
|
119
|
+
if (in != null) {
|
120
|
+
in.close();
|
121
|
+
closed = true;
|
122
|
+
in = null;
|
123
|
+
}
|
110
124
|
}
|
111
125
|
|
112
126
|
@Override
|
113
127
|
public void mark(int readlimit)
|
114
128
|
{
|
129
|
+
try {
|
130
|
+
ensureOpened();
|
131
|
+
}
|
132
|
+
catch (IOException ex) {
|
133
|
+
throw new RuntimeException(ex);
|
134
|
+
}
|
115
135
|
in.mark(readlimit);
|
116
136
|
markedOffset = offset;
|
117
137
|
}
|
@@ -119,6 +139,7 @@ public class ResumableInputStream
|
|
119
139
|
@Override
|
120
140
|
public void reset() throws IOException
|
121
141
|
{
|
142
|
+
ensureOpened();
|
122
143
|
in.reset();
|
123
144
|
offset = markedOffset;
|
124
145
|
}
|
@@ -126,6 +147,22 @@ public class ResumableInputStream
|
|
126
147
|
@Override
|
127
148
|
public boolean markSupported()
|
128
149
|
{
|
150
|
+
try {
|
151
|
+
ensureOpened();
|
152
|
+
}
|
153
|
+
catch (IOException ex) {
|
154
|
+
throw new RuntimeException(ex);
|
155
|
+
}
|
129
156
|
return in.markSupported();
|
130
157
|
}
|
158
|
+
|
159
|
+
private void ensureOpened() throws IOException
|
160
|
+
{
|
161
|
+
if (in == null) {
|
162
|
+
if (closed) {
|
163
|
+
throw new IOException("stream closed");
|
164
|
+
}
|
165
|
+
reopen(lastClosedCause);
|
166
|
+
}
|
167
|
+
}
|
131
168
|
}
|