embulk 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +1 -1
- data/bin/embulk +1 -1
- data/build.gradle +99 -101
- data/embulk-core/build.gradle +37 -2
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +5 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.3.rst +34 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +39 -6
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +2 -0
- data/lib/embulk/command/embulk.rb +4 -0
- data/lib/embulk/command/embulk_example.rb +2 -2
- data/lib/embulk/command/embulk_run.rb +4 -1
- data/lib/embulk/data/new/README.md.erb +1 -1
- data/lib/embulk/guess/csv.rb +44 -0
- data/lib/embulk/guess/time_format_guess.rb +1 -1
- data/lib/embulk/version.rb +1 -1
- metadata +5 -5
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b24d1b7fd55459a318f9dbce2fff9b976f40738b
|
4
|
+
data.tar.gz: 88573c322b4d3e32f4fb11775122db1c1b28f3d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3179a8555d542ae38a785ad308a9b44560ae66889f263c932675e4ef5050b719636126ec0a960c6dba098030e269c3b0dd7b34f841d278857464c143ea0a2940
|
7
|
+
data.tar.gz: 20772970a71e622473f119ebc4ee9316fdbfff48e96948345fb6a001342d727a70ecd525e217fac98eb0838f2e20ccebcfd6d48603c945a0d9b096f7070ea92a
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
|
|
24
24
|
The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
|
25
25
|
|
26
26
|
```
|
27
|
-
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.
|
27
|
+
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.3.jar -O embulk.jar
|
28
28
|
java -jar embulk.jar --help
|
29
29
|
```
|
30
30
|
|
data/bin/embulk
CHANGED
data/build.gradle
CHANGED
@@ -4,19 +4,22 @@ plugins {
|
|
4
4
|
id 'com.github.ben-manes.versions' version '0.7'
|
5
5
|
id 'com.github.jruby-gradle.base' version '0.1.5'
|
6
6
|
id 'com.github.johnrengelman.shadow' version '1.2.0'
|
7
|
-
id 'java'
|
8
7
|
}
|
9
8
|
|
9
|
+
def java_projects = [project(":embulk-core"), project(":embulk-standards"), project(":embulk-cli")]
|
10
|
+
def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
11
|
+
|
10
12
|
allprojects {
|
11
13
|
group = 'org.embulk'
|
12
|
-
version = '0.4.
|
14
|
+
version = '0.4.3'
|
13
15
|
|
14
|
-
apply plugin: '
|
16
|
+
apply plugin: 'java'
|
15
17
|
apply plugin: 'maven-publish'
|
16
18
|
apply plugin: 'com.jfrog.bintray'
|
17
|
-
apply plugin: 'java'
|
18
19
|
|
19
|
-
//
|
20
|
+
//
|
21
|
+
// bintrayUpload task
|
22
|
+
//
|
20
23
|
bintray {
|
21
24
|
// write at your bintray user name and api key to ~/.gradle/gradle.properties file:
|
22
25
|
// bintray_user=frsyuki
|
@@ -24,7 +27,13 @@ allprojects {
|
|
24
27
|
user = project.hasProperty('bintray_user') ? bintray_user : ''
|
25
28
|
key = project.hasProperty('bintray_api_key') ? bintray_api_key : ''
|
26
29
|
|
27
|
-
publications = ['
|
30
|
+
publications = ['bintrayMavenRelease']
|
31
|
+
|
32
|
+
filesSpec {
|
33
|
+
// include embulk-<version>.jar built by 'cli' task
|
34
|
+
from "pkg/embulk-${project.version}.jar"
|
35
|
+
into "embulk-${project.version}.jar"
|
36
|
+
}
|
28
37
|
|
29
38
|
dryRun = false
|
30
39
|
publish = false // TODO automate uploading embulk.jar and make this true
|
@@ -56,54 +65,24 @@ allprojects {
|
|
56
65
|
}
|
57
66
|
|
58
67
|
subprojects {
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
sourceCompatibility = 1.7
|
69
|
-
targetCompatibility = 1.7
|
68
|
+
if (java_projects.contains(project)) {
|
69
|
+
apply plugin: 'maven'
|
70
|
+
apply plugin: 'findbugs'
|
71
|
+
apply plugin: 'jacoco'
|
72
|
+
|
73
|
+
repositories {
|
74
|
+
mavenCentral()
|
75
|
+
jcenter()
|
76
|
+
}
|
70
77
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
compile.exclude group: 'asm', module: 'asm'
|
75
|
-
compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
|
76
|
-
}
|
78
|
+
compileJava.options.encoding = 'UTF-8' // source encoding
|
79
|
+
sourceCompatibility = 1.7
|
80
|
+
targetCompatibility = 1.7
|
77
81
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
compile 'com.google.inject:guice:3.0'
|
82
|
-
compile 'com.google.inject.extensions:guice-multibindings:3.0'
|
83
|
-
compile 'javax.inject:javax.inject:1'
|
84
|
-
compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
|
85
|
-
compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
|
86
|
-
compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
|
87
|
-
compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
|
88
|
-
compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
|
89
|
-
compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
|
90
|
-
compile 'log4j:log4j:1.2.17'
|
91
|
-
compile 'org.slf4j:slf4j-api:1.7.10'
|
92
|
-
compile 'org.slf4j:slf4j-log4j12:1.7.10'
|
93
|
-
compile 'org.jruby:jruby-complete:1.7.19'
|
94
|
-
compile 'com.google.code.findbugs:annotations:3.0.0'
|
95
|
-
compile 'org.yaml:snakeyaml:1.14'
|
96
|
-
compile 'javax.validation:validation-api:1.1.0.Final'
|
97
|
-
compile 'org.apache.bval:bval-jsr303:0.5'
|
98
|
-
compile 'io.airlift:slice:0.9'
|
99
|
-
compile 'joda-time:joda-time:2.7'
|
100
|
-
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
101
|
-
compile 'com.ibm.icu:icu4j:54.1.1'
|
102
|
-
|
103
|
-
testCompile 'junit:junit:4.12'
|
104
|
-
}
|
82
|
+
dependencies {
|
83
|
+
testCompile 'junit:junit:4.12'
|
84
|
+
}
|
105
85
|
|
106
|
-
gradle.projectsEvaluated {
|
107
86
|
tasks.withType(JavaCompile) {
|
108
87
|
options.compilerArgs << "-Xlint:unchecked" //<< "-Xlint:deprecation"
|
109
88
|
}
|
@@ -113,39 +92,36 @@ subprojects {
|
|
113
92
|
html.enabled = true
|
114
93
|
}
|
115
94
|
}
|
116
|
-
}
|
117
95
|
|
118
|
-
|
119
|
-
|
120
|
-
|
96
|
+
findbugs {
|
97
|
+
ignoreFailures = true
|
98
|
+
}
|
121
99
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
100
|
+
javadoc {
|
101
|
+
options {
|
102
|
+
locale = 'en_US'
|
103
|
+
encoding = 'UTF-8'
|
104
|
+
}
|
126
105
|
}
|
127
|
-
}
|
128
106
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
107
|
+
// add javadoc/source jar tasks as artifacts to be released
|
108
|
+
task sourcesJar(type: Jar, dependsOn: classes) {
|
109
|
+
classifier = 'sources'
|
110
|
+
from sourceSets.main.allSource
|
111
|
+
}
|
112
|
+
task javadocJar(type: Jar, dependsOn: javadoc) {
|
113
|
+
classifier = 'javadoc'
|
114
|
+
from javadoc.destinationDir
|
115
|
+
}
|
116
|
+
artifacts {
|
117
|
+
archives sourcesJar, javadocJar
|
118
|
+
}
|
140
119
|
}
|
141
120
|
|
142
121
|
publishing {
|
143
122
|
publications {
|
144
|
-
if (
|
145
|
-
|
146
|
-
groupId project.group
|
147
|
-
artifactId project.name
|
148
|
-
version project.version
|
123
|
+
if (release_projects.contains(project)) {
|
124
|
+
bintrayMavenRelease(MavenPublication) {
|
149
125
|
from components.java
|
150
126
|
artifact sourcesJar
|
151
127
|
artifact javadocJar
|
@@ -155,6 +131,28 @@ subprojects {
|
|
155
131
|
}
|
156
132
|
}
|
157
133
|
|
134
|
+
//
|
135
|
+
// classpath task
|
136
|
+
//
|
137
|
+
task classpath(dependsOn: ['build', ':embulk-cli:classpath']) << { }
|
138
|
+
clean { delete 'classpath' }
|
139
|
+
|
140
|
+
//
|
141
|
+
// cli task
|
142
|
+
//
|
143
|
+
task cli(dependsOn: ':embulk-cli:shadowJar') << {
|
144
|
+
file('pkg').mkdirs()
|
145
|
+
File f = file("pkg/embulk-${project.version}.jar")
|
146
|
+
f.write('''\
|
147
|
+
#!/bin/sh
|
148
|
+
exec java -jar "$0" "$@"
|
149
|
+
exit 127
|
150
|
+
''')
|
151
|
+
f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
|
152
|
+
f.setExecutable(true)
|
153
|
+
}
|
154
|
+
bintrayUpload.dependsOn(['cli'])
|
155
|
+
|
158
156
|
project(':embulk-cli') {
|
159
157
|
apply plugin: 'com.github.johnrengelman.shadow'
|
160
158
|
|
@@ -167,31 +165,19 @@ project(':embulk-cli') {
|
|
167
165
|
'Specification-Version': project.version,
|
168
166
|
'Main-Class': 'org.embulk.cli.Main'
|
169
167
|
}
|
170
|
-
append("${
|
168
|
+
append("${rootProject.projectDir}/COPYING")
|
171
169
|
}
|
172
170
|
|
173
171
|
task classpath(type: Copy) {
|
174
|
-
doFirst { file("${
|
172
|
+
doFirst { file("${rootProject.projectDir}/classpath").mkdirs() }
|
175
173
|
from configurations.runtime
|
176
|
-
into "${
|
174
|
+
into "${rootProject.projectDir}/classpath"
|
177
175
|
}
|
178
176
|
}
|
179
177
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
task cli(dependsOn: ':embulk-cli:shadowJar') << {
|
184
|
-
file('pkg').mkdirs()
|
185
|
-
File f = file("pkg/embulk-${project.version}.jar")
|
186
|
-
f.write('''\
|
187
|
-
#!/bin/sh
|
188
|
-
exec java -jar "$0" "$@"
|
189
|
-
exit 127
|
190
|
-
''')
|
191
|
-
f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
|
192
|
-
f.setExecutable(true)
|
193
|
-
}
|
194
|
-
|
178
|
+
//
|
179
|
+
// gem task
|
180
|
+
//
|
195
181
|
import com.github.jrubygradle.JRubyExec
|
196
182
|
task gem(type: JRubyExec) {
|
197
183
|
jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'build'
|
@@ -201,6 +187,18 @@ task gem(type: JRubyExec) {
|
|
201
187
|
gem.dependsOn('gemspec')
|
202
188
|
gem.dependsOn('classpath')
|
203
189
|
|
190
|
+
//
|
191
|
+
// rubyGemsUpload task
|
192
|
+
//
|
193
|
+
task rubyGemsUpload(type: JRubyExec, dependsOn: ["gem"]) {
|
194
|
+
jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'push'
|
195
|
+
script "pkg/embulk-${project.version}.gem"
|
196
|
+
}
|
197
|
+
gem.dependsOn('gemspec')
|
198
|
+
|
199
|
+
//
|
200
|
+
// releaseCheck and release tasks
|
201
|
+
//
|
204
202
|
task releaseCheck << {
|
205
203
|
if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
|
206
204
|
throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
|
@@ -215,21 +213,21 @@ task releaseCheck << {
|
|
215
213
|
if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
|
216
214
|
throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
|
217
215
|
}
|
218
|
-
|
216
|
+
// TODO check git-ls-files includes release-<version>.rst file
|
217
|
+
println "Ready. Run 'release' task."
|
219
218
|
}
|
220
219
|
|
221
|
-
task release(dependsOn: ["
|
220
|
+
task release(dependsOn: ["releaseCheck", "bintrayUpload", "rubyGemsUpload"]) << {
|
222
221
|
println """
|
223
|
-
|
222
|
+
Manual operations:
|
224
223
|
|
225
|
-
git commit -
|
224
|
+
git commit -am v${project.version}
|
226
225
|
git tag v${project.version}
|
227
|
-
gem push pkg/embulk-${project.version}.gem"
|
228
|
-
./gradlew bintrayUpload
|
229
|
-
open "https://bintray.com/embulk/maven/embulk/${project.version}/view" # and upload pkg/embulk-${project.version}.jar
|
230
226
|
|
231
227
|
"""
|
232
228
|
}
|
229
|
+
bintrayUpload.mustRunAfter('releaseCheck')
|
230
|
+
rubyGemsUpload.mustRunAfter('releaseCheck')
|
233
231
|
|
234
232
|
task gemspec << {
|
235
233
|
file('build').mkdirs()
|
data/embulk-core/build.gradle
CHANGED
@@ -1,6 +1,41 @@
|
|
1
|
+
// include ruby scripts
|
1
2
|
sourceSets {
|
2
3
|
main.resources {
|
3
|
-
srcDirs
|
4
|
-
srcDirs "${parent.projectDir}/lib"
|
4
|
+
srcDirs "${rootProject.projectDir}/lib"
|
5
5
|
}
|
6
6
|
}
|
7
|
+
|
8
|
+
configurations {
|
9
|
+
// com.google.inject:guice depends on asm and cglib but version of the libraries conflict
|
10
|
+
// with ones bundled in jruby-complete and cause bytecode compatibility error
|
11
|
+
compile.exclude group: 'asm', module: 'asm'
|
12
|
+
compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
|
13
|
+
}
|
14
|
+
|
15
|
+
// determine which dependencies have updates: $ gradle dependencyUpdates
|
16
|
+
dependencies {
|
17
|
+
compile 'com.google.guava:guava:18.0'
|
18
|
+
compile 'com.google.inject:guice:3.0'
|
19
|
+
compile 'com.google.inject.extensions:guice-multibindings:3.0'
|
20
|
+
compile 'javax.inject:javax.inject:1'
|
21
|
+
compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
|
22
|
+
compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
|
23
|
+
compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
|
24
|
+
compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
|
25
|
+
compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
|
26
|
+
compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
|
27
|
+
compile 'log4j:log4j:1.2.17'
|
28
|
+
compile 'org.slf4j:slf4j-api:1.7.10'
|
29
|
+
compile 'org.slf4j:slf4j-log4j12:1.7.10'
|
30
|
+
compile 'org.jruby:jruby-complete:1.7.19'
|
31
|
+
compile 'com.google.code.findbugs:annotations:3.0.0'
|
32
|
+
compile 'org.yaml:snakeyaml:1.14'
|
33
|
+
compile 'javax.validation:validation-api:1.1.0.Final'
|
34
|
+
compile 'org.apache.bval:bval-jsr303:0.5'
|
35
|
+
compile 'io.airlift:slice:0.9'
|
36
|
+
compile 'joda-time:joda-time:2.7'
|
37
|
+
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
38
|
+
|
39
|
+
// for embulk/guess/charset.rb
|
40
|
+
compile 'com.ibm.icu:icu4j:54.1.1'
|
41
|
+
}
|
@@ -169,7 +169,11 @@ public class Runner
|
|
169
169
|
|
170
170
|
String yml = writeNextConfig(options.getNextConfigOutputPath(), config, configDiff);
|
171
171
|
System.err.println(yml);
|
172
|
-
|
172
|
+
if (options.getNextConfigOutputPath() == null) {
|
173
|
+
System.out.println("Use -o PATH option to write the guessed config file to a file.");
|
174
|
+
} else {
|
175
|
+
System.out.println("Created '"+options.getNextConfigOutputPath()+"' file.");
|
176
|
+
}
|
173
177
|
}
|
174
178
|
|
175
179
|
private void checkFileWritable(String path)
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
Release 0.4.3
|
2
|
+
==================================
|
3
|
+
|
4
|
+
CLI
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* All subcommands show current time with timezone and embulk's version number at the beginning.
|
8
|
+
|
9
|
+
Plugin API Changes
|
10
|
+
------------------
|
11
|
+
|
12
|
+
* ``Thread.currentThread().getContextClassLoader()`` no longer returns JRuby's classloader. It returns null so that dependent libraries fallback to appropriate ``this.getClass().getContextClassLoader()`` call.
|
13
|
+
|
14
|
+
Built-in plugins
|
15
|
+
------------------
|
16
|
+
|
17
|
+
* ``guess/csv`` guesses ``escape`` and ``null_string`` options.
|
18
|
+
* Fixed ``guess/csv`` fails if the csv file includes a timestamp value with timezone (@kinyuka++).
|
19
|
+
* Fixed memory leak at ``output/file`` (@akirakw++).
|
20
|
+
* Fixed ``input/file`` loads unnecessary files when it lists files from ``.``.
|
21
|
+
|
22
|
+
General Changes
|
23
|
+
------------------
|
24
|
+
|
25
|
+
* embulk-cli artifact is no longer released.
|
26
|
+
* embulk-standards artifact doesn't directly depend on dependencies of embulk-core.
|
27
|
+
* Updated the build script.
|
28
|
+
|
29
|
+
* ``bintrayUpload`` task uploads embulk-<version>.jar.
|
30
|
+
* ``release`` task actually releases gem to RubyGems and publishes jar files to Bintray.
|
31
|
+
|
32
|
+
Release Date
|
33
|
+
------------------
|
34
|
+
2015-02-17
|
@@ -51,6 +51,8 @@ public class LocalFileInputPlugin
|
|
51
51
|
|
52
52
|
private final Logger log = Exec.getLogger(getClass());
|
53
53
|
|
54
|
+
private final static Path CURRENT_DIR = Paths.get(".").normalize();
|
55
|
+
|
54
56
|
@Override
|
55
57
|
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
56
58
|
{
|
@@ -92,27 +94,58 @@ public class LocalFileInputPlugin
|
|
92
94
|
} else {
|
93
95
|
fileNamePrefix = pathPrefix.getFileName().toString();
|
94
96
|
Path d = pathPrefix.getParent();
|
95
|
-
directory = (d == null ?
|
97
|
+
directory = (d == null ? CURRENT_DIR : d);
|
96
98
|
}
|
97
99
|
|
98
100
|
final ImmutableList.Builder<String> builder = ImmutableList.builder();
|
99
101
|
final String lastPath = task.getLastPath().orNull();
|
100
102
|
try {
|
101
|
-
log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory, fileNamePrefix);
|
103
|
+
log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory.equals(CURRENT_DIR) ? "." : directory.toString(), fileNamePrefix);
|
102
104
|
Files.walkFileTree(directory, new SimpleFileVisitor<Path>() {
|
103
105
|
@Override
|
104
|
-
public FileVisitResult
|
106
|
+
public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs)
|
107
|
+
{
|
108
|
+
if (path.equals(directory)) {
|
109
|
+
return FileVisitResult.CONTINUE;
|
110
|
+
} else if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
|
111
|
+
return FileVisitResult.SKIP_SUBTREE;
|
112
|
+
} else {
|
113
|
+
Path parent = path.getParent();
|
114
|
+
if (parent == null) {
|
115
|
+
parent = CURRENT_DIR;
|
116
|
+
}
|
117
|
+
if (parent.equals(directory)) {
|
118
|
+
if (path.getFileName().toString().startsWith(fileNamePrefix)) {
|
119
|
+
return FileVisitResult.CONTINUE;
|
120
|
+
} else {
|
121
|
+
return FileVisitResult.SKIP_SUBTREE;
|
122
|
+
}
|
123
|
+
} else {
|
124
|
+
return FileVisitResult.CONTINUE;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
@Override
|
130
|
+
public FileVisitResult visitFile(Path path, BasicFileAttributes attrs)
|
105
131
|
{
|
106
|
-
if (lastPath
|
107
|
-
|
132
|
+
if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
|
133
|
+
return FileVisitResult.CONTINUE;
|
134
|
+
} else {
|
135
|
+
Path parent = path.getParent();
|
136
|
+
if (parent == null) {
|
137
|
+
parent = CURRENT_DIR;
|
138
|
+
}
|
139
|
+
if (parent.equals(directory)) {
|
108
140
|
if (path.getFileName().toString().startsWith(fileNamePrefix)) {
|
109
141
|
builder.add(path.toString());
|
142
|
+
return FileVisitResult.CONTINUE;
|
110
143
|
}
|
111
144
|
} else {
|
112
145
|
builder.add(path.toString());
|
113
146
|
}
|
147
|
+
return FileVisitResult.CONTINUE;
|
114
148
|
}
|
115
|
-
return FileVisitResult.CONTINUE;
|
116
149
|
}
|
117
150
|
});
|
118
151
|
} catch (IOException ex) {
|
@@ -1,3 +1,7 @@
|
|
1
|
+
# reset context class loader set by org.jruby.Main.main to nil. embulk manages
|
2
|
+
# multiple classloaders. default classloader should be Plugin.class.getClassloader().
|
3
|
+
java.lang.Thread.current_thread.set_context_class_loader(nil)
|
4
|
+
|
1
5
|
bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
|
2
6
|
bundle_path = nil if bundle_path.empty?
|
3
7
|
|
@@ -13,8 +13,8 @@ module Embulk
|
|
13
13
|
id,account,time,purchase,comment
|
14
14
|
1,32864,2015-01-27 19:23:49,20150127,embulk
|
15
15
|
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
|
16
|
-
3,27559,2015-01-28 02:20:02,20150128,
|
17
|
-
4,11270,2015-01-29 11:54:36,20150129,
|
16
|
+
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
|
17
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL
|
18
18
|
EOF
|
19
19
|
end
|
20
20
|
|
@@ -17,8 +17,9 @@ module Embulk
|
|
17
17
|
# to make sure org.embulk.jruby.JRubyScriptingModule can require 'embulk/java/bootstrap'
|
18
18
|
$LOAD_PATH << Embulk.home('lib')
|
19
19
|
|
20
|
+
require 'embulk/version'
|
21
|
+
|
20
22
|
if argv.include?('--version')
|
21
|
-
require 'embulk/version'
|
22
23
|
puts "embulk #{Embulk::VERSION}"
|
23
24
|
exit 0
|
24
25
|
end
|
@@ -31,6 +32,8 @@ module Embulk
|
|
31
32
|
require 'optparse'
|
32
33
|
op = OptionParser.new
|
33
34
|
|
35
|
+
puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S %Z")}: Embulk v#{Embulk::VERSION}"
|
36
|
+
|
34
37
|
load_paths = []
|
35
38
|
classpaths = []
|
36
39
|
classpath_separator = java.io.File.pathSeparator
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -13,6 +13,17 @@ module Embulk
|
|
13
13
|
"\"", "'"
|
14
14
|
]
|
15
15
|
|
16
|
+
ESCAPE_CANDIDATES = [
|
17
|
+
"\\"
|
18
|
+
]
|
19
|
+
|
20
|
+
NULL_STRING_CANDIDATES = [
|
21
|
+
"null",
|
22
|
+
"NULL",
|
23
|
+
"#N/A",
|
24
|
+
"\\N", # MySQL LOAD, Hive STORED AS TEXTFILE
|
25
|
+
]
|
26
|
+
|
16
27
|
# CsvParserPlugin.TRUE_STRINGS
|
17
28
|
TRUE_STRINGS = Hash[*%w[
|
18
29
|
true True TRUE
|
@@ -35,6 +46,13 @@ module Embulk
|
|
35
46
|
quote = guess_quote(sample_lines, delim)
|
36
47
|
parser_guessed["quote"] = quote ? quote : ''
|
37
48
|
|
49
|
+
escape = guess_escape(sample_lines, delim, quote)
|
50
|
+
parser_guessed["escape"] = escape ? escape : ''
|
51
|
+
|
52
|
+
null_string = guess_null_string(sample_lines, delim)
|
53
|
+
parser_guessed["null_string"] = null_string if null_string
|
54
|
+
# don't even set null_string to avoid confusion of null and 'null' in YAML format
|
55
|
+
|
38
56
|
sample_records = sample_lines.map {|line| line.split(delim) } # TODO use CsvTokenizer
|
39
57
|
first_types = guess_field_types(sample_records[0, 1])
|
40
58
|
other_types = guess_field_types(sample_records[1..-1])
|
@@ -119,6 +137,32 @@ module Embulk
|
|
119
137
|
end
|
120
138
|
end
|
121
139
|
|
140
|
+
def guess_escape(sample_lines, delim, optional_quote)
|
141
|
+
guessed = ESCAPE_CANDIDATES.map do |str|
|
142
|
+
if optional_quote
|
143
|
+
regexp = /#{Regexp.quote(str)}(?:#{Regexp.quote(delim)}|#{Regexp.quote(optional_quote)})/
|
144
|
+
else
|
145
|
+
regexp = /#{Regexp.quote(str)}#{Regexp.quote(delim)}/
|
146
|
+
end
|
147
|
+
counts = sample_lines.map {|line| line.scan(regexp).count }
|
148
|
+
count = counts.inject(0) {|r,c| r + c }
|
149
|
+
[str, count]
|
150
|
+
end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
|
151
|
+
found = guessed.first
|
152
|
+
return found ? found[0] : nil
|
153
|
+
end
|
154
|
+
|
155
|
+
def guess_null_string(sample_lines, delim)
|
156
|
+
guessed = NULL_STRING_CANDIDATES.map do |str|
|
157
|
+
regexp = /(?:^|#{Regexp.quote(delim)})#{Regexp.quote(str)}(?:$|#{Regexp.quote(delim)})/
|
158
|
+
counts = sample_lines.map {|line| line.scan(regexp).count }
|
159
|
+
count = counts.inject(0) {|r,c| r + c }
|
160
|
+
[str, count]
|
161
|
+
end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
|
162
|
+
found = guessed.first
|
163
|
+
return found ? found[0] : nil
|
164
|
+
end
|
165
|
+
|
122
166
|
def guess_field_types(field_lines)
|
123
167
|
column_lines = []
|
124
168
|
field_lines.each do |fields|
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -161,7 +161,6 @@ files:
|
|
161
161
|
- embulk-core/src/main/java/org/embulk/plugin/PluginSource.java
|
162
162
|
- embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java
|
163
163
|
- embulk-core/src/main/java/org/embulk/plugin/PluginType.java
|
164
|
-
- embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java
|
165
164
|
- embulk-core/src/main/java/org/embulk/spi/Buffer.java
|
166
165
|
- embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java
|
167
166
|
- embulk-core/src/main/java/org/embulk/spi/Column.java
|
@@ -268,6 +267,7 @@ files:
|
|
268
267
|
- embulk-docs/src/release/release-0.4.0.rst
|
269
268
|
- embulk-docs/src/release/release-0.4.1.rst
|
270
269
|
- embulk-docs/src/release/release-0.4.2.rst
|
270
|
+
- embulk-docs/src/release/release-0.4.3.rst
|
271
271
|
- embulk-standards/build.gradle
|
272
272
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
273
273
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -362,8 +362,8 @@ files:
|
|
362
362
|
- classpath/bval-jsr303-0.5.jar
|
363
363
|
- classpath/commons-beanutils-core-1.8.3.jar
|
364
364
|
- classpath/commons-lang3-3.1.jar
|
365
|
-
- classpath/embulk-core-0.4.
|
366
|
-
- classpath/embulk-standards-0.4.
|
365
|
+
- classpath/embulk-core-0.4.3.jar
|
366
|
+
- classpath/embulk-standards-0.4.3.jar
|
367
367
|
- classpath/guava-18.0.jar
|
368
368
|
- classpath/guice-3.0.jar
|
369
369
|
- classpath/guice-multibindings-3.0.jar
|
@@ -1,19 +0,0 @@
|
|
1
|
-
package org.embulk.plugin;
|
2
|
-
|
3
|
-
public class SetThreadContextClassLoader
|
4
|
-
implements AutoCloseable
|
5
|
-
{
|
6
|
-
private final ClassLoader original;
|
7
|
-
|
8
|
-
public SetThreadContextClassLoader(ClassLoader classLoader)
|
9
|
-
{
|
10
|
-
this.original = Thread.currentThread().getContextClassLoader();
|
11
|
-
Thread.currentThread().setContextClassLoader(classLoader);
|
12
|
-
}
|
13
|
-
|
14
|
-
@Override
|
15
|
-
public void close()
|
16
|
-
{
|
17
|
-
Thread.currentThread().setContextClassLoader(original);
|
18
|
-
}
|
19
|
-
}
|