embulk 0.8.15 → 0.8.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/appveyor.yml +8 -0
- data/build.gradle +86 -45
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
- data/embulk-docs/src/built-in.rst +34 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.16.rst +43 -0
- data/embulk-standards/build.gradle +1 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +30 -21
- data/gradlew.bat +4 -10
- data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
- data/lib/embulk/data/new/java/build.gradle.erb +5 -3
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +30 -21
- data/lib/embulk/data/new/java/gradlew.bat +4 -10
- data/lib/embulk/guess/csv.rb +44 -22
- data/lib/embulk/guess/newline.rb +10 -4
- data/lib/embulk/guess_plugin.rb +3 -1
- data/lib/embulk/java/time_helper.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- metadata +92 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8343679bf498ccc8d1519185737a9c54ced0d669
|
4
|
+
data.tar.gz: 3b737c7e3886c6915f503bd2960efb993ac3c4e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e11e26e3e9025d669a9cb9761d883e9470bd67e52a4019aa6d2b08b6ae2c44822fdea0be435862ebce21c7267ba2e08293e3e03441297e063b220a9fe9a29a6
|
7
|
+
data.tar.gz: 31b5b9f4f008951efbec4b84e75a823c4832d2ec5bd495bb4024c97687160008566aed4dbccfe25638da18995e3b46da4e885cd942e2e29e02709c672b56b694
|
data/README.md
CHANGED
@@ -72,6 +72,7 @@ Embulk bundles some built-in plugins such as `embulk-encoder-gzip` or `embulk-fo
|
|
72
72
|
in:
|
73
73
|
type: file
|
74
74
|
path_prefix: "./try1/csv/sample_"
|
75
|
+
...
|
75
76
|
out:
|
76
77
|
type: command
|
77
78
|
command: "cat - > task.$INDEX.$SEQID.csv.gz"
|
@@ -199,7 +200,7 @@ bintray_api_key=(bintray api key)
|
|
199
200
|
Run following commands and follow its instruction:
|
200
201
|
|
201
202
|
```
|
202
|
-
./gradlew
|
203
|
+
./gradlew setVersion -Pto=$VERSION
|
203
204
|
```
|
204
205
|
|
205
206
|
```
|
data/appveyor.yml
CHANGED
@@ -3,7 +3,15 @@ version: "{branch} {build}"
|
|
3
3
|
build:
|
4
4
|
verbosity: detailed
|
5
5
|
|
6
|
+
install:
|
7
|
+
- ps: $zipPath = "$($env:USERPROFILE)\Handle.zip"
|
8
|
+
- ps: (New-Object Net.WebClient).DownloadFile('https://download.sysinternals.com/files/Handle.zip', $zipPath)
|
9
|
+
- ps: 7z x $zipPath -y -o"$env:APPVEYOR_BUILD_FOLDER" | Out-Null
|
10
|
+
|
11
|
+
# To avoid call-selfrun.bat FileNotFound exception, it installs and uses handle.exe as workaround provided by AppVeyor support.
|
12
|
+
# see http://help.appveyor.com/discussions/problems/5975-the-process-cannot-access-the-file-because-it-is-being-used-by-another-process
|
6
13
|
build_script:
|
14
|
+
- handle.exe -a -u C:\projects\embulk\embulk-cli\build\classes\test\org\embulk\cli\call-selfrun.bat -nobanner
|
7
15
|
- gradlew.bat --info --no-daemon check rubyTest
|
8
16
|
|
9
17
|
cache:
|
data/build.gradle
CHANGED
@@ -4,7 +4,7 @@ buildscript {
|
|
4
4
|
classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.3.1'
|
5
5
|
classpath "com.github.jruby-gradle:jruby-gradle-jar-plugin:1.0.1"
|
6
6
|
classpath 'com.github.ben-manes:gradle-versions-plugin:0.11.3'
|
7
|
-
classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.
|
7
|
+
classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.3'
|
8
8
|
}
|
9
9
|
}
|
10
10
|
apply plugin: "com.github.jruby-gradle.jar"
|
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards"), p
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.8.
|
19
|
+
version = '0.8.16'
|
20
20
|
|
21
21
|
ext {
|
22
22
|
jrubyVersion = '9.1.5.0'
|
@@ -151,7 +151,7 @@ subprojects {
|
|
151
151
|
}
|
152
152
|
|
153
153
|
task wrapper(type: Wrapper) {
|
154
|
-
gradleVersion = '2.
|
154
|
+
gradleVersion = '3.2.1'
|
155
155
|
}
|
156
156
|
|
157
157
|
import com.github.jrubygradle.JRubyExec
|
@@ -201,19 +201,22 @@ task updateResources(dependsOn: 'classpath') {
|
|
201
201
|
//
|
202
202
|
// classpath task
|
203
203
|
//
|
204
|
-
task classpath(dependsOn: ['build', ':embulk-cli:classpath'])
|
204
|
+
task classpath(dependsOn: ['build', ':embulk-cli:classpath']) { doLast {} }
|
205
|
+
|
205
206
|
clean { delete 'classpath' }
|
206
207
|
|
207
208
|
//
|
208
209
|
// cli task
|
209
210
|
//
|
210
|
-
task cli(dependsOn: ':embulk-cli:shadowJar')
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
211
|
+
task cli(dependsOn: ':embulk-cli:shadowJar') {
|
212
|
+
doLast {
|
213
|
+
file('pkg').mkdirs()
|
214
|
+
File f = file("pkg/embulk-${project.version}.jar")
|
215
|
+
f.write("")
|
216
|
+
f.append(file("embulk-cli/src/main/sh/selfrun.sh").readBytes())
|
217
|
+
f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
|
218
|
+
f.setExecutable(true)
|
219
|
+
}
|
217
220
|
}
|
218
221
|
bintrayUpload.dependsOn(['cli'])
|
219
222
|
|
@@ -282,58 +285,96 @@ task rubyGemsUpload(type: Exec, dependsOn: ["gem", "rubyGemsUploadJRuby"]) {
|
|
282
285
|
//
|
283
286
|
// releaseCheck and release tasks
|
284
287
|
//
|
285
|
-
task releaseCheck
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
288
|
+
task releaseCheck {
|
289
|
+
doLast {
|
290
|
+
if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
|
291
|
+
throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
|
292
|
+
}
|
293
|
+
if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains("${project.version}")) {
|
294
|
+
throw new GradleException("Release note for ${project.version} doesn't exist")
|
295
|
+
}
|
296
|
+
if (!file("embulk-docs/src/release.rst").getText().contains("release-${project.version}")) {
|
297
|
+
throw new GradleException("embulk-docs/src/release.rst doesn't include release-${project.version}")
|
298
|
+
}
|
299
|
+
String date = new Date().format("yyyy-MM-dd")
|
300
|
+
if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
|
301
|
+
throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
|
302
|
+
}
|
303
|
+
// TODO check git-ls-files includes release-<version>.rst file
|
304
|
+
println "Ready. Run 'release' task."
|
298
305
|
}
|
299
|
-
// TODO check git-ls-files includes release-<version>.rst file
|
300
|
-
println "Ready. Run 'release' task."
|
301
306
|
}
|
302
307
|
|
303
|
-
task release(dependsOn: ["cli", "releaseCheck", "bintrayUpload", "rubyGemsUpload"])
|
304
|
-
|
308
|
+
task release(dependsOn: ["cli", "releaseCheck", "bintrayUpload", "rubyGemsUpload"]) {
|
309
|
+
doLast {
|
310
|
+
println """
|
305
311
|
Manual operations:
|
306
312
|
|
307
313
|
git commit -am v${project.version}
|
308
314
|
git tag v${project.version}
|
309
315
|
|
310
316
|
"""
|
317
|
+
}
|
311
318
|
}
|
312
319
|
bintrayUpload.mustRunAfter('releaseCheck')
|
313
320
|
rubyGemsUpload.mustRunAfter('releaseCheck')
|
314
321
|
rubyGemsUploadJRuby.mustRunAfter('releaseCheck')
|
315
322
|
|
316
|
-
task setVersion
|
317
|
-
|
318
|
-
|
319
|
-
|
323
|
+
task setVersion {
|
324
|
+
doLast {
|
325
|
+
if (!project.hasProperty("to")) {
|
326
|
+
throw new GradleException("Usage: ./gradlew setVersion -Pto=VERSION")
|
327
|
+
}
|
328
|
+
|
329
|
+
File gradle_ver = file('build.gradle')
|
330
|
+
gradle_ver.write(gradle_ver.getText().replaceFirst("version = '(\\d+)(\\.\\d+){2}'", "version = '${to}'"))
|
320
331
|
|
321
|
-
|
322
|
-
|
332
|
+
File ruby_ver = file('lib/embulk/version.rb')
|
333
|
+
ruby_ver.write(ruby_ver.getText().replaceFirst("VERSION = '(\\d+)(\\.\\d+){2}'", "VERSION = '${to}'"))
|
334
|
+
|
335
|
+
List<String> docs = [
|
336
|
+
'README.md',
|
337
|
+
]
|
338
|
+
docs.each() { path ->
|
339
|
+
File doc = file(path)
|
340
|
+
doc.write(doc.getText().replaceAll('embulk-(\\d+)(\\.\\d+){2}', "embulk-${to}"))
|
341
|
+
}
|
323
342
|
|
324
|
-
|
325
|
-
|
343
|
+
file("embulk-docs/src/release/release-${to}.rst").append("")
|
344
|
+
"git add embulk-docs/src/release/release-${to}.rst".execute().waitFor()
|
326
345
|
|
327
|
-
|
328
|
-
'README.md',
|
329
|
-
]
|
330
|
-
docs.each() { path ->
|
331
|
-
File doc = file(path)
|
332
|
-
doc.write(doc.getText().replaceAll('embulk-(\\d+)(\\.\\d+){2}', "embulk-${to}"))
|
346
|
+
println "add 'release/release-${to}' line to embulk-docs/src/release.rst"
|
333
347
|
}
|
348
|
+
}
|
349
|
+
|
350
|
+
task updateJRuby {
|
351
|
+
doLast {
|
352
|
+
if (!project.hasProperty("to")) {
|
353
|
+
throw new GradleException("Usage: ./gradlew updateJRuby -Pto=VERSION")
|
354
|
+
}
|
334
355
|
|
335
|
-
|
336
|
-
|
356
|
+
File gradle_ver = file('build.gradle')
|
357
|
+
gradle_ver.write(gradle_ver.getText().replaceFirst("jrubyVersion = '(\\d+)(\\.\\d+){3}'", "jrubyVersion = '${to}'"))
|
337
358
|
|
338
|
-
|
359
|
+
File gemspec_ver = file('embulk.gemspec')
|
360
|
+
gemspec_ver.write(gemspec_ver.getText().replaceFirst("gem.add_dependency \"jruby-jars\", '= (\\d+)(\\.\\d+){3}'", "gem.add_dependency \"jruby-jars\", '= ${to}'"))
|
361
|
+
|
362
|
+
File migrate_plugin_ver = file('lib/embulk/command/embulk_migrate_plugin.rb')
|
363
|
+
migrate_plugin_ver.write(migrate_plugin_ver.getText().replaceFirst("\".ruby-version\", \"jruby-(\\d+)(\\.\\d+){3}\"", "\".ruby-version\", \"jruby-${to}\""))
|
364
|
+
|
365
|
+
File gemfile_lock_ver = file('Gemfile.lock')
|
366
|
+
gemfile_lock_ver.write(gemfile_lock_ver.getText().replaceFirst("jruby-jars \\(= (\\d+)(\\.\\d+){3}\\)", "jruby-jars (= ${to})"))
|
367
|
+
|
368
|
+
gemfile_lock_ver.write(gemfile_lock_ver.getText().replaceFirst("jruby-jars \\((\\d+)(\\.\\d+){3}\\)", "jruby-jars (${to})"))
|
369
|
+
|
370
|
+
List<String> dot_ruby_vers = [
|
371
|
+
'lib/embulk/data/bundle/.ruby-version',
|
372
|
+
'lib/embulk/data/new/ruby/.ruby-version'
|
373
|
+
]
|
374
|
+
dot_ruby_vers.each() { path ->
|
375
|
+
File dot_ruby_ver = file(path)
|
376
|
+
dot_ruby_ver.write(dot_ruby_ver.getText().replaceAll('jruby-(\\d+)(\\.\\d+){3}', "jruby-${to}"))
|
377
|
+
}
|
378
|
+
|
379
|
+
}
|
339
380
|
}
|
@@ -37,6 +37,8 @@ public class SamplingParserPlugin
|
|
37
37
|
if (taskCount == 0) {
|
38
38
|
throw new NoSampleException("No input files to read sample data");
|
39
39
|
}
|
40
|
+
int maxSize = -1;
|
41
|
+
int maxSizeTaskIndex = -1;
|
40
42
|
for (int taskIndex=0; taskIndex < taskCount; taskIndex++) {
|
41
43
|
try {
|
42
44
|
runner.run(taskSource, schema, taskIndex, new PageOutput() {
|
@@ -51,10 +53,33 @@ public class SamplingParserPlugin
|
|
51
53
|
public void close() { }
|
52
54
|
});
|
53
55
|
} catch (NotEnoughSampleError ex) {
|
56
|
+
if (maxSize < ex.getSize()) {
|
57
|
+
maxSize = ex.getSize();
|
58
|
+
maxSizeTaskIndex = taskIndex;
|
59
|
+
}
|
54
60
|
continue;
|
55
61
|
}
|
56
62
|
}
|
57
|
-
|
63
|
+
if (maxSize <= 0) {
|
64
|
+
throw new NoSampleException("All input files are empty");
|
65
|
+
}
|
66
|
+
taskSource.getNested("ParserTaskSource").set("force", true);
|
67
|
+
try {
|
68
|
+
runner.run(taskSource, schema, maxSizeTaskIndex, new PageOutput() {
|
69
|
+
@Override
|
70
|
+
public void add(Page page)
|
71
|
+
{
|
72
|
+
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
|
73
|
+
}
|
74
|
+
|
75
|
+
public void finish() { }
|
76
|
+
|
77
|
+
public void close() { }
|
78
|
+
});
|
79
|
+
} catch (NotEnoughSampleError ex) {
|
80
|
+
throw new NoSampleException("All input files are smaller than minimum sampling size");
|
81
|
+
}
|
82
|
+
throw new NoSampleException("All input files are smaller than minimum sampling size");
|
58
83
|
}
|
59
84
|
});
|
60
85
|
throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
|
@@ -81,7 +106,19 @@ public class SamplingParserPlugin
|
|
81
106
|
|
82
107
|
public static class NotEnoughSampleError
|
83
108
|
extends Error
|
84
|
-
{
|
109
|
+
{
|
110
|
+
private final int size;
|
111
|
+
|
112
|
+
public NotEnoughSampleError(int size)
|
113
|
+
{
|
114
|
+
this.size = size;
|
115
|
+
}
|
116
|
+
|
117
|
+
public int getSize()
|
118
|
+
{
|
119
|
+
return size;
|
120
|
+
}
|
121
|
+
}
|
85
122
|
|
86
123
|
private final int minSampleSize;
|
87
124
|
private final int sampleSize;
|
@@ -105,8 +142,10 @@ public class SamplingParserPlugin
|
|
105
142
|
FileInput input, PageOutput output)
|
106
143
|
{
|
107
144
|
Buffer buffer = readSample(input, sampleSize);
|
108
|
-
if (
|
109
|
-
|
145
|
+
if (!taskSource.get(boolean.class, "force", false)) {
|
146
|
+
if (buffer.limit() < minSampleSize) {
|
147
|
+
throw new NotEnoughSampleError(buffer.limit());
|
148
|
+
}
|
110
149
|
}
|
111
150
|
throw new SampledNoticeError(buffer);
|
112
151
|
}
|
@@ -123,6 +123,11 @@ public class PageBuilder
|
|
123
123
|
|
124
124
|
public void setString(int columnIndex, String value)
|
125
125
|
{
|
126
|
+
if (value == null) {
|
127
|
+
setNull(columnIndex);
|
128
|
+
return;
|
129
|
+
}
|
130
|
+
|
126
131
|
Integer reuseIndex = stringReferences.get(value);
|
127
132
|
if (reuseIndex != null) {
|
128
133
|
bufferSlice.setInt(getOffset(columnIndex), reuseIndex);
|
@@ -143,6 +148,11 @@ public class PageBuilder
|
|
143
148
|
|
144
149
|
public void setJson(int columnIndex, Value value)
|
145
150
|
{
|
151
|
+
if (value == null) {
|
152
|
+
setNull(columnIndex);
|
153
|
+
return;
|
154
|
+
}
|
155
|
+
|
146
156
|
int index = valueReferences.size();
|
147
157
|
valueReferences.add(value.immutableValue());
|
148
158
|
bufferSlice.setInt(getOffset(columnIndex), index);
|
@@ -158,6 +168,11 @@ public class PageBuilder
|
|
158
168
|
|
159
169
|
public void setTimestamp(int columnIndex, Timestamp value)
|
160
170
|
{
|
171
|
+
if (value == null) {
|
172
|
+
setNull(columnIndex);
|
173
|
+
return;
|
174
|
+
}
|
175
|
+
|
161
176
|
int offset = getOffset(columnIndex);
|
162
177
|
bufferSlice.setLong(offset, value.getEpochSecond());
|
163
178
|
bufferSlice.setInt(offset + 8, value.getNano());
|
@@ -15,6 +15,8 @@ public class ResumableInputStream
|
|
15
15
|
protected InputStream in;
|
16
16
|
private long offset;
|
17
17
|
private long markedOffset;
|
18
|
+
private Exception lastClosedCause;
|
19
|
+
private boolean closed;
|
18
20
|
|
19
21
|
public ResumableInputStream(InputStream initialInputStream, Reopener reopener)
|
20
22
|
{
|
@@ -22,6 +24,7 @@ public class ResumableInputStream
|
|
22
24
|
this.in = initialInputStream;
|
23
25
|
this.offset = 0L;
|
24
26
|
this.markedOffset = 0L;
|
27
|
+
this.lastClosedCause = null;
|
25
28
|
}
|
26
29
|
|
27
30
|
public ResumableInputStream(Reopener reopener) throws IOException
|
@@ -32,6 +35,7 @@ public class ResumableInputStream
|
|
32
35
|
private void reopen(Exception closedCause) throws IOException
|
33
36
|
{
|
34
37
|
if (in != null) {
|
38
|
+
lastClosedCause = closedCause;
|
35
39
|
try {
|
36
40
|
in.close();
|
37
41
|
} catch (IOException ignored) {
|
@@ -39,11 +43,13 @@ public class ResumableInputStream
|
|
39
43
|
in = null;
|
40
44
|
}
|
41
45
|
in = reopener.reopen(offset, closedCause);
|
46
|
+
lastClosedCause = null;
|
42
47
|
}
|
43
48
|
|
44
49
|
@Override
|
45
50
|
public int read() throws IOException
|
46
51
|
{
|
52
|
+
ensureOpened();
|
47
53
|
while (true) {
|
48
54
|
try {
|
49
55
|
int v = in.read();
|
@@ -58,6 +64,7 @@ public class ResumableInputStream
|
|
58
64
|
@Override
|
59
65
|
public int read(byte[] b) throws IOException
|
60
66
|
{
|
67
|
+
ensureOpened();
|
61
68
|
while (true) {
|
62
69
|
try {
|
63
70
|
int r = in.read(b);
|
@@ -72,6 +79,7 @@ public class ResumableInputStream
|
|
72
79
|
@Override
|
73
80
|
public int read(byte[] b, int off, int len) throws IOException
|
74
81
|
{
|
82
|
+
ensureOpened();
|
75
83
|
while (true) {
|
76
84
|
try {
|
77
85
|
int r = in.read(b, off, len);
|
@@ -86,6 +94,7 @@ public class ResumableInputStream
|
|
86
94
|
@Override
|
87
95
|
public long skip(long n) throws IOException
|
88
96
|
{
|
97
|
+
ensureOpened();
|
89
98
|
while (true) {
|
90
99
|
try {
|
91
100
|
long r = in.skip(n);
|
@@ -100,18 +109,29 @@ public class ResumableInputStream
|
|
100
109
|
@Override
|
101
110
|
public int available() throws IOException
|
102
111
|
{
|
112
|
+
ensureOpened();
|
103
113
|
return in.available();
|
104
114
|
}
|
105
115
|
|
106
116
|
@Override
|
107
117
|
public void close() throws IOException
|
108
118
|
{
|
109
|
-
in
|
119
|
+
if (in != null) {
|
120
|
+
in.close();
|
121
|
+
closed = true;
|
122
|
+
in = null;
|
123
|
+
}
|
110
124
|
}
|
111
125
|
|
112
126
|
@Override
|
113
127
|
public void mark(int readlimit)
|
114
128
|
{
|
129
|
+
try {
|
130
|
+
ensureOpened();
|
131
|
+
}
|
132
|
+
catch (IOException ex) {
|
133
|
+
throw new RuntimeException(ex);
|
134
|
+
}
|
115
135
|
in.mark(readlimit);
|
116
136
|
markedOffset = offset;
|
117
137
|
}
|
@@ -119,6 +139,7 @@ public class ResumableInputStream
|
|
119
139
|
@Override
|
120
140
|
public void reset() throws IOException
|
121
141
|
{
|
142
|
+
ensureOpened();
|
122
143
|
in.reset();
|
123
144
|
offset = markedOffset;
|
124
145
|
}
|
@@ -126,6 +147,22 @@ public class ResumableInputStream
|
|
126
147
|
@Override
|
127
148
|
public boolean markSupported()
|
128
149
|
{
|
150
|
+
try {
|
151
|
+
ensureOpened();
|
152
|
+
}
|
153
|
+
catch (IOException ex) {
|
154
|
+
throw new RuntimeException(ex);
|
155
|
+
}
|
129
156
|
return in.markSupported();
|
130
157
|
}
|
158
|
+
|
159
|
+
private void ensureOpened() throws IOException
|
160
|
+
{
|
161
|
+
if (in == null) {
|
162
|
+
if (closed) {
|
163
|
+
throw new IOException("stream closed");
|
164
|
+
}
|
165
|
+
reopen(lastClosedCause);
|
166
|
+
}
|
167
|
+
}
|
131
168
|
}
|