embulk 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d63d08592793b691be7f691d842f73d18a202d45
4
- data.tar.gz: 5a14e328213b2a97fe6e0abaa044428bb5caa0fd
3
+ metadata.gz: b24d1b7fd55459a318f9dbce2fff9b976f40738b
4
+ data.tar.gz: 88573c322b4d3e32f4fb11775122db1c1b28f3d2
5
5
  SHA512:
6
- metadata.gz: 8e447c2ae251e6ef3309c0862344d10074797389ea2c55bce2e2302d82eea2425345316efdf7dae9adcd9f08de5b0132c04e6cad43edaef29ac3cf635ad640f8
7
- data.tar.gz: 0b33b0a4f2dc7911d1432828e303c464c8b25bda00dc9eb3df8c9fb905c85011c30132292c581163fb557d2e2eaeb3a32a8bbd61bd486509361b4464aa2477ed
6
+ metadata.gz: 3179a8555d542ae38a785ad308a9b44560ae66889f263c932675e4ef5050b719636126ec0a960c6dba098030e269c3b0dd7b34f841d278857464c143ea0a2940
7
+ data.tar.gz: 20772970a71e622473f119ebc4ee9316fdbfff48e96948345fb6a001342d727a70ecd525e217fac98eb0838f2e20ccebcfd6d48603c945a0d9b096f7070ea92a
data/.gitignore CHANGED
@@ -1,5 +1,6 @@
1
1
  target/
2
2
  build/
3
+ pkg/
3
4
  *.iml
4
5
  *~
5
6
  ._*
data/README.md CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
24
24
  The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
25
25
 
26
26
  ```
27
- wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.2.jar -O embulk.jar
27
+ wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.3.jar -O embulk.jar
28
28
  java -jar embulk.jar --help
29
29
  ```
30
30
 
data/bin/embulk CHANGED
@@ -13,7 +13,7 @@ while i = ARGV.find_index {|arg| arg =~ /^\-D/ }
13
13
  end
14
14
  ARGV.slice!(i, 2)
15
15
  else
16
- java_args << ARGV[i]
16
+ java_args << ARGV[i][2..-1]
17
17
  ARGV.slice!(i)
18
18
  end
19
19
  end
@@ -4,19 +4,22 @@ plugins {
4
4
  id 'com.github.ben-manes.versions' version '0.7'
5
5
  id 'com.github.jruby-gradle.base' version '0.1.5'
6
6
  id 'com.github.johnrengelman.shadow' version '1.2.0'
7
- id 'java'
8
7
  }
9
8
 
9
+ def java_projects = [project(":embulk-core"), project(":embulk-standards"), project(":embulk-cli")]
10
+ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
+
10
12
  allprojects {
11
13
  group = 'org.embulk'
12
- version = '0.4.2'
14
+ version = '0.4.3'
13
15
 
14
- apply plugin: 'maven' // install jar files to the local repo: $ gradle install
16
+ apply plugin: 'java'
15
17
  apply plugin: 'maven-publish'
16
18
  apply plugin: 'com.jfrog.bintray'
17
- apply plugin: 'java'
18
19
 
19
- // upload artifacts to Bintray: $ gradle bintrayUpload
20
+ //
21
+ // bintrayUpload task
22
+ //
20
23
  bintray {
21
24
  // write at your bintray user name and api key to ~/.gradle/gradle.properties file:
22
25
  // bintray_user=frsyuki
@@ -24,7 +27,13 @@ allprojects {
24
27
  user = project.hasProperty('bintray_user') ? bintray_user : ''
25
28
  key = project.hasProperty('bintray_api_key') ? bintray_api_key : ''
26
29
 
27
- publications = ['mavenJava']
30
+ publications = ['bintrayMavenRelease']
31
+
32
+ filesSpec {
33
+ // include embulk-<version>.jar built by 'cli' task
34
+ from "pkg/embulk-${project.version}.jar"
35
+ into "embulk-${project.version}.jar"
36
+ }
28
37
 
29
38
  dryRun = false
30
39
  publish = false // TODO automate uploading embulk.jar and make this true
@@ -56,54 +65,24 @@ allprojects {
56
65
  }
57
66
 
58
67
  subprojects {
59
- apply plugin: 'findbugs'
60
- apply plugin: 'jacoco'
61
-
62
- repositories {
63
- mavenCentral()
64
- jcenter()
65
- }
66
-
67
- compileJava.options.encoding = 'UTF-8' // source encoding
68
- sourceCompatibility = 1.7
69
- targetCompatibility = 1.7
68
+ if (java_projects.contains(project)) {
69
+ apply plugin: 'maven'
70
+ apply plugin: 'findbugs'
71
+ apply plugin: 'jacoco'
72
+
73
+ repositories {
74
+ mavenCentral()
75
+ jcenter()
76
+ }
70
77
 
71
- configurations {
72
- // guice depends on asm and cglib but version of the libraries conflict
73
- // with ones bundled in jruby-complete and cause bytecode compatibility error
74
- compile.exclude group: 'asm', module: 'asm'
75
- compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
76
- }
78
+ compileJava.options.encoding = 'UTF-8' // source encoding
79
+ sourceCompatibility = 1.7
80
+ targetCompatibility = 1.7
77
81
 
78
- // determine which dependencies have updates: $ gradle dependencyUpdates
79
- dependencies {
80
- compile 'com.google.guava:guava:18.0'
81
- compile 'com.google.inject:guice:3.0'
82
- compile 'com.google.inject.extensions:guice-multibindings:3.0'
83
- compile 'javax.inject:javax.inject:1'
84
- compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
85
- compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
86
- compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
87
- compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
88
- compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
89
- compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
90
- compile 'log4j:log4j:1.2.17'
91
- compile 'org.slf4j:slf4j-api:1.7.10'
92
- compile 'org.slf4j:slf4j-log4j12:1.7.10'
93
- compile 'org.jruby:jruby-complete:1.7.19'
94
- compile 'com.google.code.findbugs:annotations:3.0.0'
95
- compile 'org.yaml:snakeyaml:1.14'
96
- compile 'javax.validation:validation-api:1.1.0.Final'
97
- compile 'org.apache.bval:bval-jsr303:0.5'
98
- compile 'io.airlift:slice:0.9'
99
- compile 'joda-time:joda-time:2.7'
100
- compile 'io.netty:netty-buffer:5.0.0.Alpha1'
101
- compile 'com.ibm.icu:icu4j:54.1.1'
102
-
103
- testCompile 'junit:junit:4.12'
104
- }
82
+ dependencies {
83
+ testCompile 'junit:junit:4.12'
84
+ }
105
85
 
106
- gradle.projectsEvaluated {
107
86
  tasks.withType(JavaCompile) {
108
87
  options.compilerArgs << "-Xlint:unchecked" //<< "-Xlint:deprecation"
109
88
  }
@@ -113,39 +92,36 @@ subprojects {
113
92
  html.enabled = true
114
93
  }
115
94
  }
116
- }
117
95
 
118
- findbugs {
119
- ignoreFailures = true
120
- }
96
+ findbugs {
97
+ ignoreFailures = true
98
+ }
121
99
 
122
- javadoc {
123
- options {
124
- locale = 'en_US'
125
- encoding = 'UTF-8'
100
+ javadoc {
101
+ options {
102
+ locale = 'en_US'
103
+ encoding = 'UTF-8'
104
+ }
126
105
  }
127
- }
128
106
 
129
- // add javadoc/source jar tasks as artifacts to be released
130
- task sourcesJar(type: Jar, dependsOn: classes) {
131
- classifier = 'sources'
132
- from sourceSets.main.allSource
133
- }
134
- task javadocJar(type: Jar, dependsOn: javadoc) {
135
- classifier = 'javadoc'
136
- from javadoc.destinationDir
137
- }
138
- artifacts {
139
- archives sourcesJar, javadocJar
107
+ // add javadoc/source jar tasks as artifacts to be released
108
+ task sourcesJar(type: Jar, dependsOn: classes) {
109
+ classifier = 'sources'
110
+ from sourceSets.main.allSource
111
+ }
112
+ task javadocJar(type: Jar, dependsOn: javadoc) {
113
+ classifier = 'javadoc'
114
+ from javadoc.destinationDir
115
+ }
116
+ artifacts {
117
+ archives sourcesJar, javadocJar
118
+ }
140
119
  }
141
120
 
142
121
  publishing {
143
122
  publications {
144
- if (!project.name.equals("embulk-docs")) {
145
- mavenJava(MavenPublication) {
146
- groupId project.group
147
- artifactId project.name
148
- version project.version
123
+ if (release_projects.contains(project)) {
124
+ bintrayMavenRelease(MavenPublication) {
149
125
  from components.java
150
126
  artifact sourcesJar
151
127
  artifact javadocJar
@@ -155,6 +131,28 @@ subprojects {
155
131
  }
156
132
  }
157
133
 
134
+ //
135
+ // classpath task
136
+ //
137
+ task classpath(dependsOn: ['build', ':embulk-cli:classpath']) << { }
138
+ clean { delete 'classpath' }
139
+
140
+ //
141
+ // cli task
142
+ //
143
+ task cli(dependsOn: ':embulk-cli:shadowJar') << {
144
+ file('pkg').mkdirs()
145
+ File f = file("pkg/embulk-${project.version}.jar")
146
+ f.write('''\
147
+ #!/bin/sh
148
+ exec java -jar "$0" "$@"
149
+ exit 127
150
+ ''')
151
+ f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
152
+ f.setExecutable(true)
153
+ }
154
+ bintrayUpload.dependsOn(['cli'])
155
+
158
156
  project(':embulk-cli') {
159
157
  apply plugin: 'com.github.johnrengelman.shadow'
160
158
 
@@ -167,31 +165,19 @@ project(':embulk-cli') {
167
165
  'Specification-Version': project.version,
168
166
  'Main-Class': 'org.embulk.cli.Main'
169
167
  }
170
- append("${parent.projectDir}/COPYING")
168
+ append("${rootProject.projectDir}/COPYING")
171
169
  }
172
170
 
173
171
  task classpath(type: Copy) {
174
- doFirst { file("${parent.projectDir}/classpath").mkdirs() }
172
+ doFirst { file("${rootProject.projectDir}/classpath").mkdirs() }
175
173
  from configurations.runtime
176
- into "${parent.projectDir}/classpath"
174
+ into "${rootProject.projectDir}/classpath"
177
175
  }
178
176
  }
179
177
 
180
- task classpath(dependsOn: ['build', ':embulk-cli:classpath']) << { }
181
- clean { delete 'classpath' }
182
-
183
- task cli(dependsOn: ':embulk-cli:shadowJar') << {
184
- file('pkg').mkdirs()
185
- File f = file("pkg/embulk-${project.version}.jar")
186
- f.write('''\
187
- #!/bin/sh
188
- exec java -jar "$0" "$@"
189
- exit 127
190
- ''')
191
- f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
192
- f.setExecutable(true)
193
- }
194
-
178
+ //
179
+ // gem task
180
+ //
195
181
  import com.github.jrubygradle.JRubyExec
196
182
  task gem(type: JRubyExec) {
197
183
  jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'build'
@@ -201,6 +187,18 @@ task gem(type: JRubyExec) {
201
187
  gem.dependsOn('gemspec')
202
188
  gem.dependsOn('classpath')
203
189
 
190
+ //
191
+ // rubyGemsUpload task
192
+ //
193
+ task rubyGemsUpload(type: JRubyExec, dependsOn: ["gem"]) {
194
+ jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'push'
195
+ script "pkg/embulk-${project.version}.gem"
196
+ }
197
+ gem.dependsOn('gemspec')
198
+
199
+ //
200
+ // releaseCheck and release tasks
201
+ //
204
202
  task releaseCheck << {
205
203
  if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
206
204
  throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
@@ -215,21 +213,21 @@ task releaseCheck << {
215
213
  if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
216
214
  throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
217
215
  }
218
- println "Ready. Run ./gradlew release"
216
+ // TODO check git-ls-files includes release-<version>.rst file
217
+ println "Ready. Run 'release' task."
219
218
  }
220
219
 
221
- task release(dependsOn: ["cli", "gem"]) << {
220
+ task release(dependsOn: ["releaseCheck", "bintrayUpload", "rubyGemsUpload"]) << {
222
221
  println """
223
- manual operations:
222
+ Manual operations:
224
223
 
225
- git commit -a -m v${project.version}
224
+ git commit -am v${project.version}
226
225
  git tag v${project.version}
227
- gem push pkg/embulk-${project.version}.gem"
228
- ./gradlew bintrayUpload
229
- open "https://bintray.com/embulk/maven/embulk/${project.version}/view" # and upload pkg/embulk-${project.version}.jar
230
226
 
231
227
  """
232
228
  }
229
+ bintrayUpload.mustRunAfter('releaseCheck')
230
+ rubyGemsUpload.mustRunAfter('releaseCheck')
233
231
 
234
232
  task gemspec << {
235
233
  file('build').mkdirs()
@@ -1,6 +1,41 @@
1
+ // include ruby scripts
1
2
  sourceSets {
2
3
  main.resources {
3
- srcDirs 'src/main/resources'
4
- srcDirs "${parent.projectDir}/lib"
4
+ srcDirs "${rootProject.projectDir}/lib"
5
5
  }
6
6
  }
7
+
8
+ configurations {
9
+ // com.google.inject:guice depends on asm and cglib but version of the libraries conflict
10
+ // with ones bundled in jruby-complete and cause bytecode compatibility error
11
+ compile.exclude group: 'asm', module: 'asm'
12
+ compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
13
+ }
14
+
15
+ // determine which dependencies have updates: $ gradle dependencyUpdates
16
+ dependencies {
17
+ compile 'com.google.guava:guava:18.0'
18
+ compile 'com.google.inject:guice:3.0'
19
+ compile 'com.google.inject.extensions:guice-multibindings:3.0'
20
+ compile 'javax.inject:javax.inject:1'
21
+ compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
22
+ compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
23
+ compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
24
+ compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
25
+ compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
26
+ compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
27
+ compile 'log4j:log4j:1.2.17'
28
+ compile 'org.slf4j:slf4j-api:1.7.10'
29
+ compile 'org.slf4j:slf4j-log4j12:1.7.10'
30
+ compile 'org.jruby:jruby-complete:1.7.19'
31
+ compile 'com.google.code.findbugs:annotations:3.0.0'
32
+ compile 'org.yaml:snakeyaml:1.14'
33
+ compile 'javax.validation:validation-api:1.1.0.Final'
34
+ compile 'org.apache.bval:bval-jsr303:0.5'
35
+ compile 'io.airlift:slice:0.9'
36
+ compile 'joda-time:joda-time:2.7'
37
+ compile 'io.netty:netty-buffer:5.0.0.Alpha1'
38
+
39
+ // for embulk/guess/charset.rb
40
+ compile 'com.ibm.icu:icu4j:54.1.1'
41
+ }
@@ -169,7 +169,11 @@ public class Runner
169
169
 
170
170
  String yml = writeNextConfig(options.getNextConfigOutputPath(), config, configDiff);
171
171
  System.err.println(yml);
172
- System.out.println("Created "+options.getNextConfigOutputPath());
172
+ if (options.getNextConfigOutputPath() == null) {
173
+ System.out.println("Use -o PATH option to write the guessed config file to a file.");
174
+ } else {
175
+ System.out.println("Created '"+options.getNextConfigOutputPath()+"' file.");
176
+ }
173
177
  }
174
178
 
175
179
  private void checkFileWritable(String path)
@@ -13,4 +13,5 @@ Release Notes
13
13
  release/release-0.4.0
14
14
  release/release-0.4.1
15
15
  release/release-0.4.2
16
+ release/release-0.4.3
16
17
 
@@ -0,0 +1,34 @@
1
+ Release 0.4.3
2
+ ==================================
3
+
4
+ CLI
5
+ ------------------
6
+
7
+ * All subcommands show current time with timezone and embulk's version number at the beginning.
8
+
9
+ Plugin API Changes
10
+ ------------------
11
+
12
+ * ``Thread.currentThread().getContextClassLoader()`` no longer returns JRuby's classloader. It returns null so that dependent libraries fallback to appropriate ``this.getClass().getContextClassLoader()`` call.
13
+
14
+ Built-in plugins
15
+ ------------------
16
+
17
+ * ``guess/csv`` guesses ``escape`` and ``null_string`` options.
18
+ * Fixed ``guess/csv`` fails if the csv file includes a timestamp value with timezone (@kinyuka++).
19
+ * Fixed memory leak at ``output/file`` (@akirakw++).
20
+ * Fixed ``input/file`` loads unnecessary files when it lists files from ``.``.
21
+
22
+ General Changes
23
+ ------------------
24
+
25
+ * embulk-cli artifact is no longer released.
26
+ * embulk-standards artifact doesn't directly depend on dependencies of embulk-core.
27
+ * Updated the build script.
28
+
29
+ * ``bintrayUpload`` task uploads embulk-<version>.jar.
30
+ * ``release`` task actually releases gem to RubyGems and publishes jar files to Bintray.
31
+
32
+ Release Date
33
+ ------------------
34
+ 2015-02-17
@@ -51,6 +51,8 @@ public class LocalFileInputPlugin
51
51
 
52
52
  private final Logger log = Exec.getLogger(getClass());
53
53
 
54
+ private final static Path CURRENT_DIR = Paths.get(".").normalize();
55
+
54
56
  @Override
55
57
  public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
56
58
  {
@@ -92,27 +94,58 @@ public class LocalFileInputPlugin
92
94
  } else {
93
95
  fileNamePrefix = pathPrefix.getFileName().toString();
94
96
  Path d = pathPrefix.getParent();
95
- directory = (d == null ? Paths.get(".") : d);
97
+ directory = (d == null ? CURRENT_DIR : d);
96
98
  }
97
99
 
98
100
  final ImmutableList.Builder<String> builder = ImmutableList.builder();
99
101
  final String lastPath = task.getLastPath().orNull();
100
102
  try {
101
- log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory, fileNamePrefix);
103
+ log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory.equals(CURRENT_DIR) ? "." : directory.toString(), fileNamePrefix);
102
104
  Files.walkFileTree(directory, new SimpleFileVisitor<Path>() {
103
105
  @Override
104
- public FileVisitResult visitFile(Path path, BasicFileAttributes aAttrs)
106
+ public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs)
107
+ {
108
+ if (path.equals(directory)) {
109
+ return FileVisitResult.CONTINUE;
110
+ } else if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
111
+ return FileVisitResult.SKIP_SUBTREE;
112
+ } else {
113
+ Path parent = path.getParent();
114
+ if (parent == null) {
115
+ parent = CURRENT_DIR;
116
+ }
117
+ if (parent.equals(directory)) {
118
+ if (path.getFileName().toString().startsWith(fileNamePrefix)) {
119
+ return FileVisitResult.CONTINUE;
120
+ } else {
121
+ return FileVisitResult.SKIP_SUBTREE;
122
+ }
123
+ } else {
124
+ return FileVisitResult.CONTINUE;
125
+ }
126
+ }
127
+ }
128
+
129
+ @Override
130
+ public FileVisitResult visitFile(Path path, BasicFileAttributes attrs)
105
131
  {
106
- if (lastPath == null || path.toString().compareTo(lastPath) > 0) {
107
- if (path.getParent().equals(directory)) {
132
+ if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
133
+ return FileVisitResult.CONTINUE;
134
+ } else {
135
+ Path parent = path.getParent();
136
+ if (parent == null) {
137
+ parent = CURRENT_DIR;
138
+ }
139
+ if (parent.equals(directory)) {
108
140
  if (path.getFileName().toString().startsWith(fileNamePrefix)) {
109
141
  builder.add(path.toString());
142
+ return FileVisitResult.CONTINUE;
110
143
  }
111
144
  } else {
112
145
  builder.add(path.toString());
113
146
  }
147
+ return FileVisitResult.CONTINUE;
114
148
  }
115
- return FileVisitResult.CONTINUE;
116
149
  }
117
150
  });
118
151
  } catch (IOException ex) {
@@ -112,6 +112,8 @@ public class LocalFileOutputPlugin
112
112
  output.write(buffer.array(), buffer.offset(), buffer.limit());
113
113
  } catch (IOException ex) {
114
114
  throw new RuntimeException(ex);
115
+ } finally {
116
+ buffer.release();
115
117
  }
116
118
  }
117
119
 
@@ -1,3 +1,7 @@
1
+ # reset context class loader set by org.jruby.Main.main to nil. embulk manages
2
+ # multiple classloaders. default classloader should be Plugin.class.getClassloader().
3
+ java.lang.Thread.current_thread.set_context_class_loader(nil)
4
+
1
5
  bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
2
6
  bundle_path = nil if bundle_path.empty?
3
7
 
@@ -13,8 +13,8 @@ module Embulk
13
13
  id,account,time,purchase,comment
14
14
  1,32864,2015-01-27 19:23:49,20150127,embulk
15
15
  2,14824,2015-01-27 19:01:23,20150127,embulk jruby
16
- 3,27559,2015-01-28 02:20:02,20150128,embulk core
17
- 4,11270,2015-01-29 11:54:36,20150129,"Embulk ""csv"" parser plugin"
16
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
17
+ 4,11270,2015-01-29 11:54:36,20150129,NULL
18
18
  EOF
19
19
  end
20
20
 
@@ -17,8 +17,9 @@ module Embulk
17
17
  # to make sure org.embulk.jruby.JRubyScriptingModule can require 'embulk/java/bootstrap'
18
18
  $LOAD_PATH << Embulk.home('lib')
19
19
 
20
+ require 'embulk/version'
21
+
20
22
  if argv.include?('--version')
21
- require 'embulk/version'
22
23
  puts "embulk #{Embulk::VERSION}"
23
24
  exit 0
24
25
  end
@@ -31,6 +32,8 @@ module Embulk
31
32
  require 'optparse'
32
33
  op = OptionParser.new
33
34
 
35
+ puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S %Z")}: Embulk v#{Embulk::VERSION}"
36
+
34
37
  load_paths = []
35
38
  classpaths = []
36
39
  classpath_separator = java.io.File.pathSeparator
@@ -4,7 +4,7 @@ TODO: Write short description here
4
4
 
5
5
  ## Overview
6
6
 
7
- * **Plugin type**: <%= embulk_category %>
7
+ * **Plugin type**: <%= display_category %>
8
8
  * **Load all or nothing**: yes
9
9
  * **Resume supported**: no
10
10
 
@@ -13,6 +13,17 @@ module Embulk
13
13
  "\"", "'"
14
14
  ]
15
15
 
16
+ ESCAPE_CANDIDATES = [
17
+ "\\"
18
+ ]
19
+
20
+ NULL_STRING_CANDIDATES = [
21
+ "null",
22
+ "NULL",
23
+ "#N/A",
24
+ "\\N", # MySQL LOAD, Hive STORED AS TEXTFILE
25
+ ]
26
+
16
27
  # CsvParserPlugin.TRUE_STRINGS
17
28
  TRUE_STRINGS = Hash[*%w[
18
29
  true True TRUE
@@ -35,6 +46,13 @@ module Embulk
35
46
  quote = guess_quote(sample_lines, delim)
36
47
  parser_guessed["quote"] = quote ? quote : ''
37
48
 
49
+ escape = guess_escape(sample_lines, delim, quote)
50
+ parser_guessed["escape"] = escape ? escape : ''
51
+
52
+ null_string = guess_null_string(sample_lines, delim)
53
+ parser_guessed["null_string"] = null_string if null_string
54
+ # don't even set null_string to avoid confusion of null and 'null' in YAML format
55
+
38
56
  sample_records = sample_lines.map {|line| line.split(delim) } # TODO use CsvTokenizer
39
57
  first_types = guess_field_types(sample_records[0, 1])
40
58
  other_types = guess_field_types(sample_records[1..-1])
@@ -119,6 +137,32 @@ module Embulk
119
137
  end
120
138
  end
121
139
 
140
+ def guess_escape(sample_lines, delim, optional_quote)
141
+ guessed = ESCAPE_CANDIDATES.map do |str|
142
+ if optional_quote
143
+ regexp = /#{Regexp.quote(str)}(?:#{Regexp.quote(delim)}|#{Regexp.quote(optional_quote)})/
144
+ else
145
+ regexp = /#{Regexp.quote(str)}#{Regexp.quote(delim)}/
146
+ end
147
+ counts = sample_lines.map {|line| line.scan(regexp).count }
148
+ count = counts.inject(0) {|r,c| r + c }
149
+ [str, count]
150
+ end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
151
+ found = guessed.first
152
+ return found ? found[0] : nil
153
+ end
154
+
155
+ def guess_null_string(sample_lines, delim)
156
+ guessed = NULL_STRING_CANDIDATES.map do |str|
157
+ regexp = /(?:^|#{Regexp.quote(delim)})#{Regexp.quote(str)}(?:$|#{Regexp.quote(delim)})/
158
+ counts = sample_lines.map {|line| line.scan(regexp).count }
159
+ count = counts.inject(0) {|r,c| r + c }
160
+ [str, count]
161
+ end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
162
+ found = guessed.first
163
+ return found ? found[0] : nil
164
+ end
165
+
122
166
  def guess_field_types(field_lines)
123
167
  column_lines = []
124
168
  field_lines.each do |fields|
@@ -231,7 +231,7 @@ module Embulk::Guess
231
231
  end
232
232
 
233
233
  if zm = /^#{TZ}$/.match(rest)
234
- delimiters << zm["zone_space"] || ''
234
+ delimiters << (zm["zone_space"] || '')
235
235
  if zm["z"]
236
236
  # TODO ISO 8601
237
237
  parts << :zone_off
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = "0.4.2"
2
+ VERSION = "0.4.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-16 00:00:00.000000000 Z
11
+ date: 2015-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -161,7 +161,6 @@ files:
161
161
  - embulk-core/src/main/java/org/embulk/plugin/PluginSource.java
162
162
  - embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java
163
163
  - embulk-core/src/main/java/org/embulk/plugin/PluginType.java
164
- - embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java
165
164
  - embulk-core/src/main/java/org/embulk/spi/Buffer.java
166
165
  - embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java
167
166
  - embulk-core/src/main/java/org/embulk/spi/Column.java
@@ -268,6 +267,7 @@ files:
268
267
  - embulk-docs/src/release/release-0.4.0.rst
269
268
  - embulk-docs/src/release/release-0.4.1.rst
270
269
  - embulk-docs/src/release/release-0.4.2.rst
270
+ - embulk-docs/src/release/release-0.4.3.rst
271
271
  - embulk-standards/build.gradle
272
272
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
273
273
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -362,8 +362,8 @@ files:
362
362
  - classpath/bval-jsr303-0.5.jar
363
363
  - classpath/commons-beanutils-core-1.8.3.jar
364
364
  - classpath/commons-lang3-3.1.jar
365
- - classpath/embulk-core-0.4.2.jar
366
- - classpath/embulk-standards-0.4.2.jar
365
+ - classpath/embulk-core-0.4.3.jar
366
+ - classpath/embulk-standards-0.4.3.jar
367
367
  - classpath/guava-18.0.jar
368
368
  - classpath/guice-3.0.jar
369
369
  - classpath/guice-multibindings-3.0.jar
@@ -1,19 +0,0 @@
1
- package org.embulk.plugin;
2
-
3
- public class SetThreadContextClassLoader
4
- implements AutoCloseable
5
- {
6
- private final ClassLoader original;
7
-
8
- public SetThreadContextClassLoader(ClassLoader classLoader)
9
- {
10
- this.original = Thread.currentThread().getContextClassLoader();
11
- Thread.currentThread().setContextClassLoader(classLoader);
12
- }
13
-
14
- @Override
15
- public void close()
16
- {
17
- Thread.currentThread().setContextClassLoader(original);
18
- }
19
- }