embulk 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +1 -1
- data/bin/embulk +1 -1
- data/build.gradle +99 -101
- data/embulk-core/build.gradle +37 -2
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +5 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.3.rst +34 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +39 -6
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +2 -0
- data/lib/embulk/command/embulk.rb +4 -0
- data/lib/embulk/command/embulk_example.rb +2 -2
- data/lib/embulk/command/embulk_run.rb +4 -1
- data/lib/embulk/data/new/README.md.erb +1 -1
- data/lib/embulk/guess/csv.rb +44 -0
- data/lib/embulk/guess/time_format_guess.rb +1 -1
- data/lib/embulk/version.rb +1 -1
- metadata +5 -5
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b24d1b7fd55459a318f9dbce2fff9b976f40738b
|
4
|
+
data.tar.gz: 88573c322b4d3e32f4fb11775122db1c1b28f3d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3179a8555d542ae38a785ad308a9b44560ae66889f263c932675e4ef5050b719636126ec0a960c6dba098030e269c3b0dd7b34f841d278857464c143ea0a2940
|
7
|
+
data.tar.gz: 20772970a71e622473f119ebc4ee9316fdbfff48e96948345fb6a001342d727a70ecd525e217fac98eb0838f2e20ccebcfd6d48603c945a0d9b096f7070ea92a
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
|
|
24
24
|
The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
|
25
25
|
|
26
26
|
```
|
27
|
-
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.
|
27
|
+
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.3.jar -O embulk.jar
|
28
28
|
java -jar embulk.jar --help
|
29
29
|
```
|
30
30
|
|
data/bin/embulk
CHANGED
data/build.gradle
CHANGED
@@ -4,19 +4,22 @@ plugins {
|
|
4
4
|
id 'com.github.ben-manes.versions' version '0.7'
|
5
5
|
id 'com.github.jruby-gradle.base' version '0.1.5'
|
6
6
|
id 'com.github.johnrengelman.shadow' version '1.2.0'
|
7
|
-
id 'java'
|
8
7
|
}
|
9
8
|
|
9
|
+
def java_projects = [project(":embulk-core"), project(":embulk-standards"), project(":embulk-cli")]
|
10
|
+
def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
11
|
+
|
10
12
|
allprojects {
|
11
13
|
group = 'org.embulk'
|
12
|
-
version = '0.4.
|
14
|
+
version = '0.4.3'
|
13
15
|
|
14
|
-
apply plugin: '
|
16
|
+
apply plugin: 'java'
|
15
17
|
apply plugin: 'maven-publish'
|
16
18
|
apply plugin: 'com.jfrog.bintray'
|
17
|
-
apply plugin: 'java'
|
18
19
|
|
19
|
-
//
|
20
|
+
//
|
21
|
+
// bintrayUpload task
|
22
|
+
//
|
20
23
|
bintray {
|
21
24
|
// write at your bintray user name and api key to ~/.gradle/gradle.properties file:
|
22
25
|
// bintray_user=frsyuki
|
@@ -24,7 +27,13 @@ allprojects {
|
|
24
27
|
user = project.hasProperty('bintray_user') ? bintray_user : ''
|
25
28
|
key = project.hasProperty('bintray_api_key') ? bintray_api_key : ''
|
26
29
|
|
27
|
-
publications = ['
|
30
|
+
publications = ['bintrayMavenRelease']
|
31
|
+
|
32
|
+
filesSpec {
|
33
|
+
// include embulk-<version>.jar built by 'cli' task
|
34
|
+
from "pkg/embulk-${project.version}.jar"
|
35
|
+
into "embulk-${project.version}.jar"
|
36
|
+
}
|
28
37
|
|
29
38
|
dryRun = false
|
30
39
|
publish = false // TODO automate uploading embulk.jar and make this true
|
@@ -56,54 +65,24 @@ allprojects {
|
|
56
65
|
}
|
57
66
|
|
58
67
|
subprojects {
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
sourceCompatibility = 1.7
|
69
|
-
targetCompatibility = 1.7
|
68
|
+
if (java_projects.contains(project)) {
|
69
|
+
apply plugin: 'maven'
|
70
|
+
apply plugin: 'findbugs'
|
71
|
+
apply plugin: 'jacoco'
|
72
|
+
|
73
|
+
repositories {
|
74
|
+
mavenCentral()
|
75
|
+
jcenter()
|
76
|
+
}
|
70
77
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
compile.exclude group: 'asm', module: 'asm'
|
75
|
-
compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
|
76
|
-
}
|
78
|
+
compileJava.options.encoding = 'UTF-8' // source encoding
|
79
|
+
sourceCompatibility = 1.7
|
80
|
+
targetCompatibility = 1.7
|
77
81
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
compile 'com.google.inject:guice:3.0'
|
82
|
-
compile 'com.google.inject.extensions:guice-multibindings:3.0'
|
83
|
-
compile 'javax.inject:javax.inject:1'
|
84
|
-
compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
|
85
|
-
compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
|
86
|
-
compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
|
87
|
-
compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
|
88
|
-
compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
|
89
|
-
compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
|
90
|
-
compile 'log4j:log4j:1.2.17'
|
91
|
-
compile 'org.slf4j:slf4j-api:1.7.10'
|
92
|
-
compile 'org.slf4j:slf4j-log4j12:1.7.10'
|
93
|
-
compile 'org.jruby:jruby-complete:1.7.19'
|
94
|
-
compile 'com.google.code.findbugs:annotations:3.0.0'
|
95
|
-
compile 'org.yaml:snakeyaml:1.14'
|
96
|
-
compile 'javax.validation:validation-api:1.1.0.Final'
|
97
|
-
compile 'org.apache.bval:bval-jsr303:0.5'
|
98
|
-
compile 'io.airlift:slice:0.9'
|
99
|
-
compile 'joda-time:joda-time:2.7'
|
100
|
-
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
101
|
-
compile 'com.ibm.icu:icu4j:54.1.1'
|
102
|
-
|
103
|
-
testCompile 'junit:junit:4.12'
|
104
|
-
}
|
82
|
+
dependencies {
|
83
|
+
testCompile 'junit:junit:4.12'
|
84
|
+
}
|
105
85
|
|
106
|
-
gradle.projectsEvaluated {
|
107
86
|
tasks.withType(JavaCompile) {
|
108
87
|
options.compilerArgs << "-Xlint:unchecked" //<< "-Xlint:deprecation"
|
109
88
|
}
|
@@ -113,39 +92,36 @@ subprojects {
|
|
113
92
|
html.enabled = true
|
114
93
|
}
|
115
94
|
}
|
116
|
-
}
|
117
95
|
|
118
|
-
|
119
|
-
|
120
|
-
|
96
|
+
findbugs {
|
97
|
+
ignoreFailures = true
|
98
|
+
}
|
121
99
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
100
|
+
javadoc {
|
101
|
+
options {
|
102
|
+
locale = 'en_US'
|
103
|
+
encoding = 'UTF-8'
|
104
|
+
}
|
126
105
|
}
|
127
|
-
}
|
128
106
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
107
|
+
// add javadoc/source jar tasks as artifacts to be released
|
108
|
+
task sourcesJar(type: Jar, dependsOn: classes) {
|
109
|
+
classifier = 'sources'
|
110
|
+
from sourceSets.main.allSource
|
111
|
+
}
|
112
|
+
task javadocJar(type: Jar, dependsOn: javadoc) {
|
113
|
+
classifier = 'javadoc'
|
114
|
+
from javadoc.destinationDir
|
115
|
+
}
|
116
|
+
artifacts {
|
117
|
+
archives sourcesJar, javadocJar
|
118
|
+
}
|
140
119
|
}
|
141
120
|
|
142
121
|
publishing {
|
143
122
|
publications {
|
144
|
-
if (
|
145
|
-
|
146
|
-
groupId project.group
|
147
|
-
artifactId project.name
|
148
|
-
version project.version
|
123
|
+
if (release_projects.contains(project)) {
|
124
|
+
bintrayMavenRelease(MavenPublication) {
|
149
125
|
from components.java
|
150
126
|
artifact sourcesJar
|
151
127
|
artifact javadocJar
|
@@ -155,6 +131,28 @@ subprojects {
|
|
155
131
|
}
|
156
132
|
}
|
157
133
|
|
134
|
+
//
|
135
|
+
// classpath task
|
136
|
+
//
|
137
|
+
task classpath(dependsOn: ['build', ':embulk-cli:classpath']) << { }
|
138
|
+
clean { delete 'classpath' }
|
139
|
+
|
140
|
+
//
|
141
|
+
// cli task
|
142
|
+
//
|
143
|
+
task cli(dependsOn: ':embulk-cli:shadowJar') << {
|
144
|
+
file('pkg').mkdirs()
|
145
|
+
File f = file("pkg/embulk-${project.version}.jar")
|
146
|
+
f.write('''\
|
147
|
+
#!/bin/sh
|
148
|
+
exec java -jar "$0" "$@"
|
149
|
+
exit 127
|
150
|
+
''')
|
151
|
+
f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
|
152
|
+
f.setExecutable(true)
|
153
|
+
}
|
154
|
+
bintrayUpload.dependsOn(['cli'])
|
155
|
+
|
158
156
|
project(':embulk-cli') {
|
159
157
|
apply plugin: 'com.github.johnrengelman.shadow'
|
160
158
|
|
@@ -167,31 +165,19 @@ project(':embulk-cli') {
|
|
167
165
|
'Specification-Version': project.version,
|
168
166
|
'Main-Class': 'org.embulk.cli.Main'
|
169
167
|
}
|
170
|
-
append("${
|
168
|
+
append("${rootProject.projectDir}/COPYING")
|
171
169
|
}
|
172
170
|
|
173
171
|
task classpath(type: Copy) {
|
174
|
-
doFirst { file("${
|
172
|
+
doFirst { file("${rootProject.projectDir}/classpath").mkdirs() }
|
175
173
|
from configurations.runtime
|
176
|
-
into "${
|
174
|
+
into "${rootProject.projectDir}/classpath"
|
177
175
|
}
|
178
176
|
}
|
179
177
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
task cli(dependsOn: ':embulk-cli:shadowJar') << {
|
184
|
-
file('pkg').mkdirs()
|
185
|
-
File f = file("pkg/embulk-${project.version}.jar")
|
186
|
-
f.write('''\
|
187
|
-
#!/bin/sh
|
188
|
-
exec java -jar "$0" "$@"
|
189
|
-
exit 127
|
190
|
-
''')
|
191
|
-
f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
|
192
|
-
f.setExecutable(true)
|
193
|
-
}
|
194
|
-
|
178
|
+
//
|
179
|
+
// gem task
|
180
|
+
//
|
195
181
|
import com.github.jrubygradle.JRubyExec
|
196
182
|
task gem(type: JRubyExec) {
|
197
183
|
jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'build'
|
@@ -201,6 +187,18 @@ task gem(type: JRubyExec) {
|
|
201
187
|
gem.dependsOn('gemspec')
|
202
188
|
gem.dependsOn('classpath')
|
203
189
|
|
190
|
+
//
|
191
|
+
// rubyGemsUpload task
|
192
|
+
//
|
193
|
+
task rubyGemsUpload(type: JRubyExec, dependsOn: ["gem"]) {
|
194
|
+
jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'push'
|
195
|
+
script "pkg/embulk-${project.version}.gem"
|
196
|
+
}
|
197
|
+
gem.dependsOn('gemspec')
|
198
|
+
|
199
|
+
//
|
200
|
+
// releaseCheck and release tasks
|
201
|
+
//
|
204
202
|
task releaseCheck << {
|
205
203
|
if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
|
206
204
|
throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
|
@@ -215,21 +213,21 @@ task releaseCheck << {
|
|
215
213
|
if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
|
216
214
|
throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
|
217
215
|
}
|
218
|
-
|
216
|
+
// TODO check git-ls-files includes release-<version>.rst file
|
217
|
+
println "Ready. Run 'release' task."
|
219
218
|
}
|
220
219
|
|
221
|
-
task release(dependsOn: ["
|
220
|
+
task release(dependsOn: ["releaseCheck", "bintrayUpload", "rubyGemsUpload"]) << {
|
222
221
|
println """
|
223
|
-
|
222
|
+
Manual operations:
|
224
223
|
|
225
|
-
git commit -
|
224
|
+
git commit -am v${project.version}
|
226
225
|
git tag v${project.version}
|
227
|
-
gem push pkg/embulk-${project.version}.gem"
|
228
|
-
./gradlew bintrayUpload
|
229
|
-
open "https://bintray.com/embulk/maven/embulk/${project.version}/view" # and upload pkg/embulk-${project.version}.jar
|
230
226
|
|
231
227
|
"""
|
232
228
|
}
|
229
|
+
bintrayUpload.mustRunAfter('releaseCheck')
|
230
|
+
rubyGemsUpload.mustRunAfter('releaseCheck')
|
233
231
|
|
234
232
|
task gemspec << {
|
235
233
|
file('build').mkdirs()
|
data/embulk-core/build.gradle
CHANGED
@@ -1,6 +1,41 @@
|
|
1
|
+
// include ruby scripts
|
1
2
|
sourceSets {
|
2
3
|
main.resources {
|
3
|
-
srcDirs
|
4
|
-
srcDirs "${parent.projectDir}/lib"
|
4
|
+
srcDirs "${rootProject.projectDir}/lib"
|
5
5
|
}
|
6
6
|
}
|
7
|
+
|
8
|
+
configurations {
|
9
|
+
// com.google.inject:guice depends on asm and cglib but version of the libraries conflict
|
10
|
+
// with ones bundled in jruby-complete and cause bytecode compatibility error
|
11
|
+
compile.exclude group: 'asm', module: 'asm'
|
12
|
+
compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
|
13
|
+
}
|
14
|
+
|
15
|
+
// determine which dependencies have updates: $ gradle dependencyUpdates
|
16
|
+
dependencies {
|
17
|
+
compile 'com.google.guava:guava:18.0'
|
18
|
+
compile 'com.google.inject:guice:3.0'
|
19
|
+
compile 'com.google.inject.extensions:guice-multibindings:3.0'
|
20
|
+
compile 'javax.inject:javax.inject:1'
|
21
|
+
compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
|
22
|
+
compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
|
23
|
+
compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
|
24
|
+
compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
|
25
|
+
compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
|
26
|
+
compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
|
27
|
+
compile 'log4j:log4j:1.2.17'
|
28
|
+
compile 'org.slf4j:slf4j-api:1.7.10'
|
29
|
+
compile 'org.slf4j:slf4j-log4j12:1.7.10'
|
30
|
+
compile 'org.jruby:jruby-complete:1.7.19'
|
31
|
+
compile 'com.google.code.findbugs:annotations:3.0.0'
|
32
|
+
compile 'org.yaml:snakeyaml:1.14'
|
33
|
+
compile 'javax.validation:validation-api:1.1.0.Final'
|
34
|
+
compile 'org.apache.bval:bval-jsr303:0.5'
|
35
|
+
compile 'io.airlift:slice:0.9'
|
36
|
+
compile 'joda-time:joda-time:2.7'
|
37
|
+
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
38
|
+
|
39
|
+
// for embulk/guess/charset.rb
|
40
|
+
compile 'com.ibm.icu:icu4j:54.1.1'
|
41
|
+
}
|
@@ -169,7 +169,11 @@ public class Runner
|
|
169
169
|
|
170
170
|
String yml = writeNextConfig(options.getNextConfigOutputPath(), config, configDiff);
|
171
171
|
System.err.println(yml);
|
172
|
-
|
172
|
+
if (options.getNextConfigOutputPath() == null) {
|
173
|
+
System.out.println("Use -o PATH option to write the guessed config file to a file.");
|
174
|
+
} else {
|
175
|
+
System.out.println("Created '"+options.getNextConfigOutputPath()+"' file.");
|
176
|
+
}
|
173
177
|
}
|
174
178
|
|
175
179
|
private void checkFileWritable(String path)
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
Release 0.4.3
|
2
|
+
==================================
|
3
|
+
|
4
|
+
CLI
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* All subcommands show current time with timezone and embulk's version number at the beginning.
|
8
|
+
|
9
|
+
Plugin API Changes
|
10
|
+
------------------
|
11
|
+
|
12
|
+
* ``Thread.currentThread().getContextClassLoader()`` no longer returns JRuby's classloader. It returns null so that dependent libraries fallback to appropriate ``this.getClass().getContextClassLoader()`` call.
|
13
|
+
|
14
|
+
Built-in plugins
|
15
|
+
------------------
|
16
|
+
|
17
|
+
* ``guess/csv`` guesses ``escape`` and ``null_string`` options.
|
18
|
+
* Fixed ``guess/csv`` fails if the csv file includes a timestamp value with timezone (@kinyuka++).
|
19
|
+
* Fixed memory leak at ``output/file`` (@akirakw++).
|
20
|
+
* Fixed ``input/file`` loads unnecessary files when it lists files from ``.``.
|
21
|
+
|
22
|
+
General Changes
|
23
|
+
------------------
|
24
|
+
|
25
|
+
* embulk-cli artifact is no longer released.
|
26
|
+
* embulk-standards artifact doesn't directly depend on dependencies of embulk-core.
|
27
|
+
* Updated the build script.
|
28
|
+
|
29
|
+
* ``bintrayUpload`` task uploads embulk-<version>.jar.
|
30
|
+
* ``release`` task actually releases gem to RubyGems and publishes jar files to Bintray.
|
31
|
+
|
32
|
+
Release Date
|
33
|
+
------------------
|
34
|
+
2015-02-17
|
@@ -51,6 +51,8 @@ public class LocalFileInputPlugin
|
|
51
51
|
|
52
52
|
private final Logger log = Exec.getLogger(getClass());
|
53
53
|
|
54
|
+
private final static Path CURRENT_DIR = Paths.get(".").normalize();
|
55
|
+
|
54
56
|
@Override
|
55
57
|
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
56
58
|
{
|
@@ -92,27 +94,58 @@ public class LocalFileInputPlugin
|
|
92
94
|
} else {
|
93
95
|
fileNamePrefix = pathPrefix.getFileName().toString();
|
94
96
|
Path d = pathPrefix.getParent();
|
95
|
-
directory = (d == null ?
|
97
|
+
directory = (d == null ? CURRENT_DIR : d);
|
96
98
|
}
|
97
99
|
|
98
100
|
final ImmutableList.Builder<String> builder = ImmutableList.builder();
|
99
101
|
final String lastPath = task.getLastPath().orNull();
|
100
102
|
try {
|
101
|
-
log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory, fileNamePrefix);
|
103
|
+
log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory.equals(CURRENT_DIR) ? "." : directory.toString(), fileNamePrefix);
|
102
104
|
Files.walkFileTree(directory, new SimpleFileVisitor<Path>() {
|
103
105
|
@Override
|
104
|
-
public FileVisitResult
|
106
|
+
public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs)
|
107
|
+
{
|
108
|
+
if (path.equals(directory)) {
|
109
|
+
return FileVisitResult.CONTINUE;
|
110
|
+
} else if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
|
111
|
+
return FileVisitResult.SKIP_SUBTREE;
|
112
|
+
} else {
|
113
|
+
Path parent = path.getParent();
|
114
|
+
if (parent == null) {
|
115
|
+
parent = CURRENT_DIR;
|
116
|
+
}
|
117
|
+
if (parent.equals(directory)) {
|
118
|
+
if (path.getFileName().toString().startsWith(fileNamePrefix)) {
|
119
|
+
return FileVisitResult.CONTINUE;
|
120
|
+
} else {
|
121
|
+
return FileVisitResult.SKIP_SUBTREE;
|
122
|
+
}
|
123
|
+
} else {
|
124
|
+
return FileVisitResult.CONTINUE;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
@Override
|
130
|
+
public FileVisitResult visitFile(Path path, BasicFileAttributes attrs)
|
105
131
|
{
|
106
|
-
if (lastPath
|
107
|
-
|
132
|
+
if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
|
133
|
+
return FileVisitResult.CONTINUE;
|
134
|
+
} else {
|
135
|
+
Path parent = path.getParent();
|
136
|
+
if (parent == null) {
|
137
|
+
parent = CURRENT_DIR;
|
138
|
+
}
|
139
|
+
if (parent.equals(directory)) {
|
108
140
|
if (path.getFileName().toString().startsWith(fileNamePrefix)) {
|
109
141
|
builder.add(path.toString());
|
142
|
+
return FileVisitResult.CONTINUE;
|
110
143
|
}
|
111
144
|
} else {
|
112
145
|
builder.add(path.toString());
|
113
146
|
}
|
147
|
+
return FileVisitResult.CONTINUE;
|
114
148
|
}
|
115
|
-
return FileVisitResult.CONTINUE;
|
116
149
|
}
|
117
150
|
});
|
118
151
|
} catch (IOException ex) {
|
@@ -1,3 +1,7 @@
|
|
1
|
+
# reset context class loader set by org.jruby.Main.main to nil. embulk manages
|
2
|
+
# multiple classloaders. default classloader should be Plugin.class.getClassloader().
|
3
|
+
java.lang.Thread.current_thread.set_context_class_loader(nil)
|
4
|
+
|
1
5
|
bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
|
2
6
|
bundle_path = nil if bundle_path.empty?
|
3
7
|
|
@@ -13,8 +13,8 @@ module Embulk
|
|
13
13
|
id,account,time,purchase,comment
|
14
14
|
1,32864,2015-01-27 19:23:49,20150127,embulk
|
15
15
|
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
|
16
|
-
3,27559,2015-01-28 02:20:02,20150128,
|
17
|
-
4,11270,2015-01-29 11:54:36,20150129,
|
16
|
+
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
|
17
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL
|
18
18
|
EOF
|
19
19
|
end
|
20
20
|
|
@@ -17,8 +17,9 @@ module Embulk
|
|
17
17
|
# to make sure org.embulk.jruby.JRubyScriptingModule can require 'embulk/java/bootstrap'
|
18
18
|
$LOAD_PATH << Embulk.home('lib')
|
19
19
|
|
20
|
+
require 'embulk/version'
|
21
|
+
|
20
22
|
if argv.include?('--version')
|
21
|
-
require 'embulk/version'
|
22
23
|
puts "embulk #{Embulk::VERSION}"
|
23
24
|
exit 0
|
24
25
|
end
|
@@ -31,6 +32,8 @@ module Embulk
|
|
31
32
|
require 'optparse'
|
32
33
|
op = OptionParser.new
|
33
34
|
|
35
|
+
puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S %Z")}: Embulk v#{Embulk::VERSION}"
|
36
|
+
|
34
37
|
load_paths = []
|
35
38
|
classpaths = []
|
36
39
|
classpath_separator = java.io.File.pathSeparator
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -13,6 +13,17 @@ module Embulk
|
|
13
13
|
"\"", "'"
|
14
14
|
]
|
15
15
|
|
16
|
+
ESCAPE_CANDIDATES = [
|
17
|
+
"\\"
|
18
|
+
]
|
19
|
+
|
20
|
+
NULL_STRING_CANDIDATES = [
|
21
|
+
"null",
|
22
|
+
"NULL",
|
23
|
+
"#N/A",
|
24
|
+
"\\N", # MySQL LOAD, Hive STORED AS TEXTFILE
|
25
|
+
]
|
26
|
+
|
16
27
|
# CsvParserPlugin.TRUE_STRINGS
|
17
28
|
TRUE_STRINGS = Hash[*%w[
|
18
29
|
true True TRUE
|
@@ -35,6 +46,13 @@ module Embulk
|
|
35
46
|
quote = guess_quote(sample_lines, delim)
|
36
47
|
parser_guessed["quote"] = quote ? quote : ''
|
37
48
|
|
49
|
+
escape = guess_escape(sample_lines, delim, quote)
|
50
|
+
parser_guessed["escape"] = escape ? escape : ''
|
51
|
+
|
52
|
+
null_string = guess_null_string(sample_lines, delim)
|
53
|
+
parser_guessed["null_string"] = null_string if null_string
|
54
|
+
# don't even set null_string to avoid confusion of null and 'null' in YAML format
|
55
|
+
|
38
56
|
sample_records = sample_lines.map {|line| line.split(delim) } # TODO use CsvTokenizer
|
39
57
|
first_types = guess_field_types(sample_records[0, 1])
|
40
58
|
other_types = guess_field_types(sample_records[1..-1])
|
@@ -119,6 +137,32 @@ module Embulk
|
|
119
137
|
end
|
120
138
|
end
|
121
139
|
|
140
|
+
def guess_escape(sample_lines, delim, optional_quote)
|
141
|
+
guessed = ESCAPE_CANDIDATES.map do |str|
|
142
|
+
if optional_quote
|
143
|
+
regexp = /#{Regexp.quote(str)}(?:#{Regexp.quote(delim)}|#{Regexp.quote(optional_quote)})/
|
144
|
+
else
|
145
|
+
regexp = /#{Regexp.quote(str)}#{Regexp.quote(delim)}/
|
146
|
+
end
|
147
|
+
counts = sample_lines.map {|line| line.scan(regexp).count }
|
148
|
+
count = counts.inject(0) {|r,c| r + c }
|
149
|
+
[str, count]
|
150
|
+
end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
|
151
|
+
found = guessed.first
|
152
|
+
return found ? found[0] : nil
|
153
|
+
end
|
154
|
+
|
155
|
+
def guess_null_string(sample_lines, delim)
|
156
|
+
guessed = NULL_STRING_CANDIDATES.map do |str|
|
157
|
+
regexp = /(?:^|#{Regexp.quote(delim)})#{Regexp.quote(str)}(?:$|#{Regexp.quote(delim)})/
|
158
|
+
counts = sample_lines.map {|line| line.scan(regexp).count }
|
159
|
+
count = counts.inject(0) {|r,c| r + c }
|
160
|
+
[str, count]
|
161
|
+
end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
|
162
|
+
found = guessed.first
|
163
|
+
return found ? found[0] : nil
|
164
|
+
end
|
165
|
+
|
122
166
|
def guess_field_types(field_lines)
|
123
167
|
column_lines = []
|
124
168
|
field_lines.each do |fields|
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -161,7 +161,6 @@ files:
|
|
161
161
|
- embulk-core/src/main/java/org/embulk/plugin/PluginSource.java
|
162
162
|
- embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java
|
163
163
|
- embulk-core/src/main/java/org/embulk/plugin/PluginType.java
|
164
|
-
- embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java
|
165
164
|
- embulk-core/src/main/java/org/embulk/spi/Buffer.java
|
166
165
|
- embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java
|
167
166
|
- embulk-core/src/main/java/org/embulk/spi/Column.java
|
@@ -268,6 +267,7 @@ files:
|
|
268
267
|
- embulk-docs/src/release/release-0.4.0.rst
|
269
268
|
- embulk-docs/src/release/release-0.4.1.rst
|
270
269
|
- embulk-docs/src/release/release-0.4.2.rst
|
270
|
+
- embulk-docs/src/release/release-0.4.3.rst
|
271
271
|
- embulk-standards/build.gradle
|
272
272
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
273
273
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -362,8 +362,8 @@ files:
|
|
362
362
|
- classpath/bval-jsr303-0.5.jar
|
363
363
|
- classpath/commons-beanutils-core-1.8.3.jar
|
364
364
|
- classpath/commons-lang3-3.1.jar
|
365
|
-
- classpath/embulk-core-0.4.
|
366
|
-
- classpath/embulk-standards-0.4.
|
365
|
+
- classpath/embulk-core-0.4.3.jar
|
366
|
+
- classpath/embulk-standards-0.4.3.jar
|
367
367
|
- classpath/guava-18.0.jar
|
368
368
|
- classpath/guice-3.0.jar
|
369
369
|
- classpath/guice-multibindings-3.0.jar
|
@@ -1,19 +0,0 @@
|
|
1
|
-
package org.embulk.plugin;
|
2
|
-
|
3
|
-
public class SetThreadContextClassLoader
|
4
|
-
implements AutoCloseable
|
5
|
-
{
|
6
|
-
private final ClassLoader original;
|
7
|
-
|
8
|
-
public SetThreadContextClassLoader(ClassLoader classLoader)
|
9
|
-
{
|
10
|
-
this.original = Thread.currentThread().getContextClassLoader();
|
11
|
-
Thread.currentThread().setContextClassLoader(classLoader);
|
12
|
-
}
|
13
|
-
|
14
|
-
@Override
|
15
|
-
public void close()
|
16
|
-
{
|
17
|
-
Thread.currentThread().setContextClassLoader(original);
|
18
|
-
}
|
19
|
-
}
|